1 /* -*- mode: c; c-basic-offset: 8; -*- 2 * vim: noexpandtab sw=8 ts=8 sts=0: 3 * 4 * dir.c 5 * 6 * Creates, reads, walks and deletes directory-nodes 7 * 8 * Copyright (C) 2002, 2004 Oracle. All rights reserved. 9 * 10 * Portions of this code from linux/fs/ext3/dir.c 11 * 12 * Copyright (C) 1992, 1993, 1994, 1995 13 * Remy Card (card@masi.ibp.fr) 14 * Laboratoire MASI - Institut Blaise pascal 15 * Universite Pierre et Marie Curie (Paris VI) 16 * 17 * from 18 * 19 * linux/fs/minix/dir.c 20 * 21 * Copyright (C) 1991, 1992 Linux Torvalds 22 * 23 * This program is free software; you can redistribute it and/or 24 * modify it under the terms of the GNU General Public 25 * License as published by the Free Software Foundation; either 26 * version 2 of the License, or (at your option) any later version. 27 * 28 * This program is distributed in the hope that it will be useful, 29 * but WITHOUT ANY WARRANTY; without even the implied warranty of 30 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 31 * General Public License for more details. 32 * 33 * You should have received a copy of the GNU General Public 34 * License along with this program; if not, write to the 35 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 36 * Boston, MA 021110-1307, USA. 37 */ 38 39 #include <linux/fs.h> 40 #include <linux/types.h> 41 #include <linux/slab.h> 42 #include <linux/highmem.h> 43 44 #define MLOG_MASK_PREFIX ML_NAMEI 45 #include <cluster/masklog.h> 46 47 #include "ocfs2.h" 48 49 #include "alloc.h" 50 #include "dir.h" 51 #include "dlmglue.h" 52 #include "extent_map.h" 53 #include "file.h" 54 #include "inode.h" 55 #include "journal.h" 56 #include "namei.h" 57 #include "suballoc.h" 58 #include "super.h" 59 #include "uptodate.h" 60 61 #include "buffer_head_io.h" 62 63 #define NAMEI_RA_CHUNKS 2 64 #define NAMEI_RA_BLOCKS 4 65 #define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS) 66 #define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b)) 67 68 static unsigned char ocfs2_filetype_table[] = { 69 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK 70 }; 71 72 static int ocfs2_extend_dir(struct ocfs2_super *osb, 73 struct inode *dir, 74 struct buffer_head *parent_fe_bh, 75 unsigned int blocks_wanted, 76 struct buffer_head **new_de_bh); 77 static int ocfs2_do_extend_dir(struct super_block *sb, 78 handle_t *handle, 79 struct inode *dir, 80 struct buffer_head *parent_fe_bh, 81 struct ocfs2_alloc_context *data_ac, 82 struct ocfs2_alloc_context *meta_ac, 83 struct buffer_head **new_bh); 84 85 static struct buffer_head *ocfs2_bread(struct inode *inode, 86 int block, int *err, int reada) 87 { 88 struct buffer_head *bh = NULL; 89 int tmperr; 90 u64 p_blkno; 91 int readflags = 0; 92 93 if (reada) 94 readflags |= OCFS2_BH_READAHEAD; 95 96 if (((u64)block << inode->i_sb->s_blocksize_bits) >= 97 i_size_read(inode)) { 98 BUG_ON(!reada); 99 return NULL; 100 } 101 102 down_read(&OCFS2_I(inode)->ip_alloc_sem); 103 tmperr = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL, 104 NULL); 105 up_read(&OCFS2_I(inode)->ip_alloc_sem); 106 if (tmperr < 0) { 107 mlog_errno(tmperr); 108 goto fail; 109 } 110 111 tmperr = ocfs2_read_blocks(inode, p_blkno, 1, &bh, readflags); 112 if (tmperr < 0) 113 goto fail; 114 115 tmperr = 0; 116 117 *err = 0; 118 return bh; 119 120 fail: 121 brelse(bh); 122 bh = NULL; 123 124 *err = -EIO; 125 return NULL; 126 } 127 128 /* 129 * bh passed here can be an inode block or a dir data block, depending 130 * on the inode inline data flag. 131 */ 132 static int ocfs2_check_dir_entry(struct inode * dir, 133 struct ocfs2_dir_entry * de, 134 struct buffer_head * bh, 135 unsigned long offset) 136 { 137 const char *error_msg = NULL; 138 const int rlen = le16_to_cpu(de->rec_len); 139 140 if (rlen < OCFS2_DIR_REC_LEN(1)) 141 error_msg = "rec_len is smaller than minimal"; 142 else if (rlen % 4 != 0) 143 error_msg = "rec_len % 4 != 0"; 144 else if (rlen < OCFS2_DIR_REC_LEN(de->name_len)) 145 error_msg = "rec_len is too small for name_len"; 146 else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize) 147 error_msg = "directory entry across blocks"; 148 149 if (error_msg != NULL) 150 mlog(ML_ERROR, "bad entry in directory #%llu: %s - " 151 "offset=%lu, inode=%llu, rec_len=%d, name_len=%d\n", 152 (unsigned long long)OCFS2_I(dir)->ip_blkno, error_msg, 153 offset, (unsigned long long)le64_to_cpu(de->inode), rlen, 154 de->name_len); 155 return error_msg == NULL ? 1 : 0; 156 } 157 158 static inline int ocfs2_match(int len, 159 const char * const name, 160 struct ocfs2_dir_entry *de) 161 { 162 if (len != de->name_len) 163 return 0; 164 if (!de->inode) 165 return 0; 166 return !memcmp(name, de->name, len); 167 } 168 169 /* 170 * Returns 0 if not found, -1 on failure, and 1 on success 171 */ 172 static int inline ocfs2_search_dirblock(struct buffer_head *bh, 173 struct inode *dir, 174 const char *name, int namelen, 175 unsigned long offset, 176 char *first_de, 177 unsigned int bytes, 178 struct ocfs2_dir_entry **res_dir) 179 { 180 struct ocfs2_dir_entry *de; 181 char *dlimit, *de_buf; 182 int de_len; 183 int ret = 0; 184 185 mlog_entry_void(); 186 187 de_buf = first_de; 188 dlimit = de_buf + bytes; 189 190 while (de_buf < dlimit) { 191 /* this code is executed quadratically often */ 192 /* do minimal checking `by hand' */ 193 194 de = (struct ocfs2_dir_entry *) de_buf; 195 196 if (de_buf + namelen <= dlimit && 197 ocfs2_match(namelen, name, de)) { 198 /* found a match - just to be sure, do a full check */ 199 if (!ocfs2_check_dir_entry(dir, de, bh, offset)) { 200 ret = -1; 201 goto bail; 202 } 203 *res_dir = de; 204 ret = 1; 205 goto bail; 206 } 207 208 /* prevent looping on a bad block */ 209 de_len = le16_to_cpu(de->rec_len); 210 if (de_len <= 0) { 211 ret = -1; 212 goto bail; 213 } 214 215 de_buf += de_len; 216 offset += de_len; 217 } 218 219 bail: 220 mlog_exit(ret); 221 return ret; 222 } 223 224 static struct buffer_head *ocfs2_find_entry_id(const char *name, 225 int namelen, 226 struct inode *dir, 227 struct ocfs2_dir_entry **res_dir) 228 { 229 int ret, found; 230 struct buffer_head *di_bh = NULL; 231 struct ocfs2_dinode *di; 232 struct ocfs2_inline_data *data; 233 234 ret = ocfs2_read_block(dir, OCFS2_I(dir)->ip_blkno, &di_bh); 235 if (ret) { 236 mlog_errno(ret); 237 goto out; 238 } 239 240 di = (struct ocfs2_dinode *)di_bh->b_data; 241 data = &di->id2.i_data; 242 243 found = ocfs2_search_dirblock(di_bh, dir, name, namelen, 0, 244 data->id_data, i_size_read(dir), res_dir); 245 if (found == 1) 246 return di_bh; 247 248 brelse(di_bh); 249 out: 250 return NULL; 251 } 252 253 static struct buffer_head *ocfs2_find_entry_el(const char *name, int namelen, 254 struct inode *dir, 255 struct ocfs2_dir_entry **res_dir) 256 { 257 struct super_block *sb; 258 struct buffer_head *bh_use[NAMEI_RA_SIZE]; 259 struct buffer_head *bh, *ret = NULL; 260 unsigned long start, block, b; 261 int ra_max = 0; /* Number of bh's in the readahead 262 buffer, bh_use[] */ 263 int ra_ptr = 0; /* Current index into readahead 264 buffer */ 265 int num = 0; 266 int nblocks, i, err; 267 268 mlog_entry_void(); 269 270 sb = dir->i_sb; 271 272 nblocks = i_size_read(dir) >> sb->s_blocksize_bits; 273 start = OCFS2_I(dir)->ip_dir_start_lookup; 274 if (start >= nblocks) 275 start = 0; 276 block = start; 277 278 restart: 279 do { 280 /* 281 * We deal with the read-ahead logic here. 282 */ 283 if (ra_ptr >= ra_max) { 284 /* Refill the readahead buffer */ 285 ra_ptr = 0; 286 b = block; 287 for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) { 288 /* 289 * Terminate if we reach the end of the 290 * directory and must wrap, or if our 291 * search has finished at this block. 292 */ 293 if (b >= nblocks || (num && block == start)) { 294 bh_use[ra_max] = NULL; 295 break; 296 } 297 num++; 298 299 bh = ocfs2_bread(dir, b++, &err, 1); 300 bh_use[ra_max] = bh; 301 } 302 } 303 if ((bh = bh_use[ra_ptr++]) == NULL) 304 goto next; 305 if (ocfs2_read_block(dir, block, &bh)) { 306 /* read error, skip block & hope for the best. 307 * ocfs2_read_block() has released the bh. */ 308 ocfs2_error(dir->i_sb, "reading directory %llu, " 309 "offset %lu\n", 310 (unsigned long long)OCFS2_I(dir)->ip_blkno, 311 block); 312 goto next; 313 } 314 i = ocfs2_search_dirblock(bh, dir, name, namelen, 315 block << sb->s_blocksize_bits, 316 bh->b_data, sb->s_blocksize, 317 res_dir); 318 if (i == 1) { 319 OCFS2_I(dir)->ip_dir_start_lookup = block; 320 ret = bh; 321 goto cleanup_and_exit; 322 } else { 323 brelse(bh); 324 if (i < 0) 325 goto cleanup_and_exit; 326 } 327 next: 328 if (++block >= nblocks) 329 block = 0; 330 } while (block != start); 331 332 /* 333 * If the directory has grown while we were searching, then 334 * search the last part of the directory before giving up. 335 */ 336 block = nblocks; 337 nblocks = i_size_read(dir) >> sb->s_blocksize_bits; 338 if (block < nblocks) { 339 start = 0; 340 goto restart; 341 } 342 343 cleanup_and_exit: 344 /* Clean up the read-ahead blocks */ 345 for (; ra_ptr < ra_max; ra_ptr++) 346 brelse(bh_use[ra_ptr]); 347 348 mlog_exit_ptr(ret); 349 return ret; 350 } 351 352 /* 353 * Try to find an entry of the provided name within 'dir'. 354 * 355 * If nothing was found, NULL is returned. Otherwise, a buffer_head 356 * and pointer to the dir entry are passed back. 357 * 358 * Caller can NOT assume anything about the contents of the 359 * buffer_head - it is passed back only so that it can be passed into 360 * any one of the manipulation functions (add entry, delete entry, 361 * etc). As an example, bh in the extent directory case is a data 362 * block, in the inline-data case it actually points to an inode. 363 */ 364 struct buffer_head *ocfs2_find_entry(const char *name, int namelen, 365 struct inode *dir, 366 struct ocfs2_dir_entry **res_dir) 367 { 368 *res_dir = NULL; 369 370 if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) 371 return ocfs2_find_entry_id(name, namelen, dir, res_dir); 372 373 return ocfs2_find_entry_el(name, namelen, dir, res_dir); 374 } 375 376 /* 377 * Update inode number and type of a previously found directory entry. 378 */ 379 int ocfs2_update_entry(struct inode *dir, handle_t *handle, 380 struct buffer_head *de_bh, struct ocfs2_dir_entry *de, 381 struct inode *new_entry_inode) 382 { 383 int ret; 384 385 /* 386 * The same code works fine for both inline-data and extent 387 * based directories, so no need to split this up. 388 */ 389 390 ret = ocfs2_journal_access(handle, dir, de_bh, 391 OCFS2_JOURNAL_ACCESS_WRITE); 392 if (ret) { 393 mlog_errno(ret); 394 goto out; 395 } 396 397 de->inode = cpu_to_le64(OCFS2_I(new_entry_inode)->ip_blkno); 398 ocfs2_set_de_type(de, new_entry_inode->i_mode); 399 400 ocfs2_journal_dirty(handle, de_bh); 401 402 out: 403 return ret; 404 } 405 406 static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir, 407 struct ocfs2_dir_entry *de_del, 408 struct buffer_head *bh, char *first_de, 409 unsigned int bytes) 410 { 411 struct ocfs2_dir_entry *de, *pde; 412 int i, status = -ENOENT; 413 414 mlog_entry("(0x%p, 0x%p, 0x%p, 0x%p)\n", handle, dir, de_del, bh); 415 416 i = 0; 417 pde = NULL; 418 de = (struct ocfs2_dir_entry *) first_de; 419 while (i < bytes) { 420 if (!ocfs2_check_dir_entry(dir, de, bh, i)) { 421 status = -EIO; 422 mlog_errno(status); 423 goto bail; 424 } 425 if (de == de_del) { 426 status = ocfs2_journal_access(handle, dir, bh, 427 OCFS2_JOURNAL_ACCESS_WRITE); 428 if (status < 0) { 429 status = -EIO; 430 mlog_errno(status); 431 goto bail; 432 } 433 if (pde) 434 le16_add_cpu(&pde->rec_len, 435 le16_to_cpu(de->rec_len)); 436 else 437 de->inode = 0; 438 dir->i_version++; 439 status = ocfs2_journal_dirty(handle, bh); 440 goto bail; 441 } 442 i += le16_to_cpu(de->rec_len); 443 pde = de; 444 de = (struct ocfs2_dir_entry *)((char *)de + le16_to_cpu(de->rec_len)); 445 } 446 bail: 447 mlog_exit(status); 448 return status; 449 } 450 451 static inline int ocfs2_delete_entry_id(handle_t *handle, 452 struct inode *dir, 453 struct ocfs2_dir_entry *de_del, 454 struct buffer_head *bh) 455 { 456 int ret; 457 struct buffer_head *di_bh = NULL; 458 struct ocfs2_dinode *di; 459 struct ocfs2_inline_data *data; 460 461 ret = ocfs2_read_block(dir, OCFS2_I(dir)->ip_blkno, &di_bh); 462 if (ret) { 463 mlog_errno(ret); 464 goto out; 465 } 466 467 di = (struct ocfs2_dinode *)di_bh->b_data; 468 data = &di->id2.i_data; 469 470 ret = __ocfs2_delete_entry(handle, dir, de_del, bh, data->id_data, 471 i_size_read(dir)); 472 473 brelse(di_bh); 474 out: 475 return ret; 476 } 477 478 static inline int ocfs2_delete_entry_el(handle_t *handle, 479 struct inode *dir, 480 struct ocfs2_dir_entry *de_del, 481 struct buffer_head *bh) 482 { 483 return __ocfs2_delete_entry(handle, dir, de_del, bh, bh->b_data, 484 bh->b_size); 485 } 486 487 /* 488 * ocfs2_delete_entry deletes a directory entry by merging it with the 489 * previous entry 490 */ 491 int ocfs2_delete_entry(handle_t *handle, 492 struct inode *dir, 493 struct ocfs2_dir_entry *de_del, 494 struct buffer_head *bh) 495 { 496 if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) 497 return ocfs2_delete_entry_id(handle, dir, de_del, bh); 498 499 return ocfs2_delete_entry_el(handle, dir, de_del, bh); 500 } 501 502 /* 503 * Check whether 'de' has enough room to hold an entry of 504 * 'new_rec_len' bytes. 505 */ 506 static inline int ocfs2_dirent_would_fit(struct ocfs2_dir_entry *de, 507 unsigned int new_rec_len) 508 { 509 unsigned int de_really_used; 510 511 /* Check whether this is an empty record with enough space */ 512 if (le64_to_cpu(de->inode) == 0 && 513 le16_to_cpu(de->rec_len) >= new_rec_len) 514 return 1; 515 516 /* 517 * Record might have free space at the end which we can 518 * use. 519 */ 520 de_really_used = OCFS2_DIR_REC_LEN(de->name_len); 521 if (le16_to_cpu(de->rec_len) >= (de_really_used + new_rec_len)) 522 return 1; 523 524 return 0; 525 } 526 527 /* we don't always have a dentry for what we want to add, so people 528 * like orphan dir can call this instead. 529 * 530 * If you pass me insert_bh, I'll skip the search of the other dir 531 * blocks and put the record in there. 532 */ 533 int __ocfs2_add_entry(handle_t *handle, 534 struct inode *dir, 535 const char *name, int namelen, 536 struct inode *inode, u64 blkno, 537 struct buffer_head *parent_fe_bh, 538 struct buffer_head *insert_bh) 539 { 540 unsigned long offset; 541 unsigned short rec_len; 542 struct ocfs2_dir_entry *de, *de1; 543 struct ocfs2_dinode *di = (struct ocfs2_dinode *)parent_fe_bh->b_data; 544 struct super_block *sb = dir->i_sb; 545 int retval, status; 546 unsigned int size = sb->s_blocksize; 547 char *data_start = insert_bh->b_data; 548 549 mlog_entry_void(); 550 551 if (!namelen) 552 return -EINVAL; 553 554 if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 555 data_start = di->id2.i_data.id_data; 556 size = i_size_read(dir); 557 558 BUG_ON(insert_bh != parent_fe_bh); 559 } 560 561 rec_len = OCFS2_DIR_REC_LEN(namelen); 562 offset = 0; 563 de = (struct ocfs2_dir_entry *) data_start; 564 while (1) { 565 BUG_ON((char *)de >= (size + data_start)); 566 567 /* These checks should've already been passed by the 568 * prepare function, but I guess we can leave them 569 * here anyway. */ 570 if (!ocfs2_check_dir_entry(dir, de, insert_bh, offset)) { 571 retval = -ENOENT; 572 goto bail; 573 } 574 if (ocfs2_match(namelen, name, de)) { 575 retval = -EEXIST; 576 goto bail; 577 } 578 579 if (ocfs2_dirent_would_fit(de, rec_len)) { 580 dir->i_mtime = dir->i_ctime = CURRENT_TIME; 581 retval = ocfs2_mark_inode_dirty(handle, dir, parent_fe_bh); 582 if (retval < 0) { 583 mlog_errno(retval); 584 goto bail; 585 } 586 587 status = ocfs2_journal_access(handle, dir, insert_bh, 588 OCFS2_JOURNAL_ACCESS_WRITE); 589 /* By now the buffer is marked for journaling */ 590 offset += le16_to_cpu(de->rec_len); 591 if (le64_to_cpu(de->inode)) { 592 de1 = (struct ocfs2_dir_entry *)((char *) de + 593 OCFS2_DIR_REC_LEN(de->name_len)); 594 de1->rec_len = 595 cpu_to_le16(le16_to_cpu(de->rec_len) - 596 OCFS2_DIR_REC_LEN(de->name_len)); 597 de->rec_len = cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len)); 598 de = de1; 599 } 600 de->file_type = OCFS2_FT_UNKNOWN; 601 if (blkno) { 602 de->inode = cpu_to_le64(blkno); 603 ocfs2_set_de_type(de, inode->i_mode); 604 } else 605 de->inode = 0; 606 de->name_len = namelen; 607 memcpy(de->name, name, namelen); 608 609 dir->i_version++; 610 status = ocfs2_journal_dirty(handle, insert_bh); 611 retval = 0; 612 goto bail; 613 } 614 offset += le16_to_cpu(de->rec_len); 615 de = (struct ocfs2_dir_entry *) ((char *) de + le16_to_cpu(de->rec_len)); 616 } 617 618 /* when you think about it, the assert above should prevent us 619 * from ever getting here. */ 620 retval = -ENOSPC; 621 bail: 622 623 mlog_exit(retval); 624 return retval; 625 } 626 627 static int ocfs2_dir_foreach_blk_id(struct inode *inode, 628 u64 *f_version, 629 loff_t *f_pos, void *priv, 630 filldir_t filldir, int *filldir_err) 631 { 632 int ret, i, filldir_ret; 633 unsigned long offset = *f_pos; 634 struct buffer_head *di_bh = NULL; 635 struct ocfs2_dinode *di; 636 struct ocfs2_inline_data *data; 637 struct ocfs2_dir_entry *de; 638 639 ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh); 640 if (ret) { 641 mlog(ML_ERROR, "Unable to read inode block for dir %llu\n", 642 (unsigned long long)OCFS2_I(inode)->ip_blkno); 643 goto out; 644 } 645 646 di = (struct ocfs2_dinode *)di_bh->b_data; 647 data = &di->id2.i_data; 648 649 while (*f_pos < i_size_read(inode)) { 650 revalidate: 651 /* If the dir block has changed since the last call to 652 * readdir(2), then we might be pointing to an invalid 653 * dirent right now. Scan from the start of the block 654 * to make sure. */ 655 if (*f_version != inode->i_version) { 656 for (i = 0; i < i_size_read(inode) && i < offset; ) { 657 de = (struct ocfs2_dir_entry *) 658 (data->id_data + i); 659 /* It's too expensive to do a full 660 * dirent test each time round this 661 * loop, but we do have to test at 662 * least that it is non-zero. A 663 * failure will be detected in the 664 * dirent test below. */ 665 if (le16_to_cpu(de->rec_len) < 666 OCFS2_DIR_REC_LEN(1)) 667 break; 668 i += le16_to_cpu(de->rec_len); 669 } 670 *f_pos = offset = i; 671 *f_version = inode->i_version; 672 } 673 674 de = (struct ocfs2_dir_entry *) (data->id_data + *f_pos); 675 if (!ocfs2_check_dir_entry(inode, de, di_bh, *f_pos)) { 676 /* On error, skip the f_pos to the end. */ 677 *f_pos = i_size_read(inode); 678 goto out; 679 } 680 offset += le16_to_cpu(de->rec_len); 681 if (le64_to_cpu(de->inode)) { 682 /* We might block in the next section 683 * if the data destination is 684 * currently swapped out. So, use a 685 * version stamp to detect whether or 686 * not the directory has been modified 687 * during the copy operation. 688 */ 689 u64 version = *f_version; 690 unsigned char d_type = DT_UNKNOWN; 691 692 if (de->file_type < OCFS2_FT_MAX) 693 d_type = ocfs2_filetype_table[de->file_type]; 694 695 filldir_ret = filldir(priv, de->name, 696 de->name_len, 697 *f_pos, 698 le64_to_cpu(de->inode), 699 d_type); 700 if (filldir_ret) { 701 if (filldir_err) 702 *filldir_err = filldir_ret; 703 break; 704 } 705 if (version != *f_version) 706 goto revalidate; 707 } 708 *f_pos += le16_to_cpu(de->rec_len); 709 } 710 711 out: 712 brelse(di_bh); 713 714 return 0; 715 } 716 717 static int ocfs2_dir_foreach_blk_el(struct inode *inode, 718 u64 *f_version, 719 loff_t *f_pos, void *priv, 720 filldir_t filldir, int *filldir_err) 721 { 722 int error = 0; 723 unsigned long offset, blk, last_ra_blk = 0; 724 int i, stored; 725 struct buffer_head * bh, * tmp; 726 struct ocfs2_dir_entry * de; 727 int err; 728 struct super_block * sb = inode->i_sb; 729 unsigned int ra_sectors = 16; 730 731 stored = 0; 732 bh = NULL; 733 734 offset = (*f_pos) & (sb->s_blocksize - 1); 735 736 while (!error && !stored && *f_pos < i_size_read(inode)) { 737 blk = (*f_pos) >> sb->s_blocksize_bits; 738 bh = ocfs2_bread(inode, blk, &err, 0); 739 if (!bh) { 740 mlog(ML_ERROR, 741 "directory #%llu contains a hole at offset %lld\n", 742 (unsigned long long)OCFS2_I(inode)->ip_blkno, 743 *f_pos); 744 *f_pos += sb->s_blocksize - offset; 745 continue; 746 } 747 748 /* The idea here is to begin with 8k read-ahead and to stay 749 * 4k ahead of our current position. 750 * 751 * TODO: Use the pagecache for this. We just need to 752 * make sure it's cluster-safe... */ 753 if (!last_ra_blk 754 || (((last_ra_blk - blk) << 9) <= (ra_sectors / 2))) { 755 for (i = ra_sectors >> (sb->s_blocksize_bits - 9); 756 i > 0; i--) { 757 tmp = ocfs2_bread(inode, ++blk, &err, 1); 758 brelse(tmp); 759 } 760 last_ra_blk = blk; 761 ra_sectors = 8; 762 } 763 764 revalidate: 765 /* If the dir block has changed since the last call to 766 * readdir(2), then we might be pointing to an invalid 767 * dirent right now. Scan from the start of the block 768 * to make sure. */ 769 if (*f_version != inode->i_version) { 770 for (i = 0; i < sb->s_blocksize && i < offset; ) { 771 de = (struct ocfs2_dir_entry *) (bh->b_data + i); 772 /* It's too expensive to do a full 773 * dirent test each time round this 774 * loop, but we do have to test at 775 * least that it is non-zero. A 776 * failure will be detected in the 777 * dirent test below. */ 778 if (le16_to_cpu(de->rec_len) < 779 OCFS2_DIR_REC_LEN(1)) 780 break; 781 i += le16_to_cpu(de->rec_len); 782 } 783 offset = i; 784 *f_pos = ((*f_pos) & ~(sb->s_blocksize - 1)) 785 | offset; 786 *f_version = inode->i_version; 787 } 788 789 while (!error && *f_pos < i_size_read(inode) 790 && offset < sb->s_blocksize) { 791 de = (struct ocfs2_dir_entry *) (bh->b_data + offset); 792 if (!ocfs2_check_dir_entry(inode, de, bh, offset)) { 793 /* On error, skip the f_pos to the 794 next block. */ 795 *f_pos = ((*f_pos) | (sb->s_blocksize - 1)) + 1; 796 brelse(bh); 797 goto out; 798 } 799 offset += le16_to_cpu(de->rec_len); 800 if (le64_to_cpu(de->inode)) { 801 /* We might block in the next section 802 * if the data destination is 803 * currently swapped out. So, use a 804 * version stamp to detect whether or 805 * not the directory has been modified 806 * during the copy operation. 807 */ 808 unsigned long version = *f_version; 809 unsigned char d_type = DT_UNKNOWN; 810 811 if (de->file_type < OCFS2_FT_MAX) 812 d_type = ocfs2_filetype_table[de->file_type]; 813 error = filldir(priv, de->name, 814 de->name_len, 815 *f_pos, 816 le64_to_cpu(de->inode), 817 d_type); 818 if (error) { 819 if (filldir_err) 820 *filldir_err = error; 821 break; 822 } 823 if (version != *f_version) 824 goto revalidate; 825 stored ++; 826 } 827 *f_pos += le16_to_cpu(de->rec_len); 828 } 829 offset = 0; 830 brelse(bh); 831 } 832 833 stored = 0; 834 out: 835 return stored; 836 } 837 838 static int ocfs2_dir_foreach_blk(struct inode *inode, u64 *f_version, 839 loff_t *f_pos, void *priv, filldir_t filldir, 840 int *filldir_err) 841 { 842 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) 843 return ocfs2_dir_foreach_blk_id(inode, f_version, f_pos, priv, 844 filldir, filldir_err); 845 846 return ocfs2_dir_foreach_blk_el(inode, f_version, f_pos, priv, filldir, 847 filldir_err); 848 } 849 850 /* 851 * This is intended to be called from inside other kernel functions, 852 * so we fake some arguments. 853 */ 854 int ocfs2_dir_foreach(struct inode *inode, loff_t *f_pos, void *priv, 855 filldir_t filldir) 856 { 857 int ret = 0, filldir_err = 0; 858 u64 version = inode->i_version; 859 860 while (*f_pos < i_size_read(inode)) { 861 ret = ocfs2_dir_foreach_blk(inode, &version, f_pos, priv, 862 filldir, &filldir_err); 863 if (ret || filldir_err) 864 break; 865 } 866 867 if (ret > 0) 868 ret = -EIO; 869 870 return 0; 871 } 872 873 /* 874 * ocfs2_readdir() 875 * 876 */ 877 int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir) 878 { 879 int error = 0; 880 struct inode *inode = filp->f_path.dentry->d_inode; 881 int lock_level = 0; 882 883 mlog_entry("dirino=%llu\n", 884 (unsigned long long)OCFS2_I(inode)->ip_blkno); 885 886 error = ocfs2_inode_lock_atime(inode, filp->f_vfsmnt, &lock_level); 887 if (lock_level && error >= 0) { 888 /* We release EX lock which used to update atime 889 * and get PR lock again to reduce contention 890 * on commonly accessed directories. */ 891 ocfs2_inode_unlock(inode, 1); 892 lock_level = 0; 893 error = ocfs2_inode_lock(inode, NULL, 0); 894 } 895 if (error < 0) { 896 if (error != -ENOENT) 897 mlog_errno(error); 898 /* we haven't got any yet, so propagate the error. */ 899 goto bail_nolock; 900 } 901 902 error = ocfs2_dir_foreach_blk(inode, &filp->f_version, &filp->f_pos, 903 dirent, filldir, NULL); 904 905 ocfs2_inode_unlock(inode, lock_level); 906 907 bail_nolock: 908 mlog_exit(error); 909 910 return error; 911 } 912 913 /* 914 * NOTE: this should always be called with parent dir i_mutex taken. 915 */ 916 int ocfs2_find_files_on_disk(const char *name, 917 int namelen, 918 u64 *blkno, 919 struct inode *inode, 920 struct buffer_head **dirent_bh, 921 struct ocfs2_dir_entry **dirent) 922 { 923 int status = -ENOENT; 924 925 mlog_entry("(name=%.*s, blkno=%p, inode=%p, dirent_bh=%p, dirent=%p)\n", 926 namelen, name, blkno, inode, dirent_bh, dirent); 927 928 *dirent_bh = ocfs2_find_entry(name, namelen, inode, dirent); 929 if (!*dirent_bh || !*dirent) { 930 status = -ENOENT; 931 goto leave; 932 } 933 934 *blkno = le64_to_cpu((*dirent)->inode); 935 936 status = 0; 937 leave: 938 if (status < 0) { 939 *dirent = NULL; 940 brelse(*dirent_bh); 941 *dirent_bh = NULL; 942 } 943 944 mlog_exit(status); 945 return status; 946 } 947 948 /* 949 * Convenience function for callers which just want the block number 950 * mapped to a name and don't require the full dirent info, etc. 951 */ 952 int ocfs2_lookup_ino_from_name(struct inode *dir, const char *name, 953 int namelen, u64 *blkno) 954 { 955 int ret; 956 struct buffer_head *bh = NULL; 957 struct ocfs2_dir_entry *dirent = NULL; 958 959 ret = ocfs2_find_files_on_disk(name, namelen, blkno, dir, &bh, &dirent); 960 brelse(bh); 961 962 return ret; 963 } 964 965 /* Check for a name within a directory. 966 * 967 * Return 0 if the name does not exist 968 * Return -EEXIST if the directory contains the name 969 * 970 * Callers should have i_mutex + a cluster lock on dir 971 */ 972 int ocfs2_check_dir_for_entry(struct inode *dir, 973 const char *name, 974 int namelen) 975 { 976 int ret; 977 struct buffer_head *dirent_bh = NULL; 978 struct ocfs2_dir_entry *dirent = NULL; 979 980 mlog_entry("dir %llu, name '%.*s'\n", 981 (unsigned long long)OCFS2_I(dir)->ip_blkno, namelen, name); 982 983 ret = -EEXIST; 984 dirent_bh = ocfs2_find_entry(name, namelen, dir, &dirent); 985 if (dirent_bh) 986 goto bail; 987 988 ret = 0; 989 bail: 990 brelse(dirent_bh); 991 992 mlog_exit(ret); 993 return ret; 994 } 995 996 struct ocfs2_empty_dir_priv { 997 unsigned seen_dot; 998 unsigned seen_dot_dot; 999 unsigned seen_other; 1000 }; 1001 static int ocfs2_empty_dir_filldir(void *priv, const char *name, int name_len, 1002 loff_t pos, u64 ino, unsigned type) 1003 { 1004 struct ocfs2_empty_dir_priv *p = priv; 1005 1006 /* 1007 * Check the positions of "." and ".." records to be sure 1008 * they're in the correct place. 1009 */ 1010 if (name_len == 1 && !strncmp(".", name, 1) && pos == 0) { 1011 p->seen_dot = 1; 1012 return 0; 1013 } 1014 1015 if (name_len == 2 && !strncmp("..", name, 2) && 1016 pos == OCFS2_DIR_REC_LEN(1)) { 1017 p->seen_dot_dot = 1; 1018 return 0; 1019 } 1020 1021 p->seen_other = 1; 1022 return 1; 1023 } 1024 /* 1025 * routine to check that the specified directory is empty (for rmdir) 1026 * 1027 * Returns 1 if dir is empty, zero otherwise. 1028 */ 1029 int ocfs2_empty_dir(struct inode *inode) 1030 { 1031 int ret; 1032 loff_t start = 0; 1033 struct ocfs2_empty_dir_priv priv; 1034 1035 memset(&priv, 0, sizeof(priv)); 1036 1037 ret = ocfs2_dir_foreach(inode, &start, &priv, ocfs2_empty_dir_filldir); 1038 if (ret) 1039 mlog_errno(ret); 1040 1041 if (!priv.seen_dot || !priv.seen_dot_dot) { 1042 mlog(ML_ERROR, "bad directory (dir #%llu) - no `.' or `..'\n", 1043 (unsigned long long)OCFS2_I(inode)->ip_blkno); 1044 /* 1045 * XXX: Is it really safe to allow an unlink to continue? 1046 */ 1047 return 1; 1048 } 1049 1050 return !priv.seen_other; 1051 } 1052 1053 static void ocfs2_fill_initial_dirents(struct inode *inode, 1054 struct inode *parent, 1055 char *start, unsigned int size) 1056 { 1057 struct ocfs2_dir_entry *de = (struct ocfs2_dir_entry *)start; 1058 1059 de->inode = cpu_to_le64(OCFS2_I(inode)->ip_blkno); 1060 de->name_len = 1; 1061 de->rec_len = 1062 cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len)); 1063 strcpy(de->name, "."); 1064 ocfs2_set_de_type(de, S_IFDIR); 1065 1066 de = (struct ocfs2_dir_entry *) ((char *)de + le16_to_cpu(de->rec_len)); 1067 de->inode = cpu_to_le64(OCFS2_I(parent)->ip_blkno); 1068 de->rec_len = cpu_to_le16(size - OCFS2_DIR_REC_LEN(1)); 1069 de->name_len = 2; 1070 strcpy(de->name, ".."); 1071 ocfs2_set_de_type(de, S_IFDIR); 1072 } 1073 1074 /* 1075 * This works together with code in ocfs2_mknod_locked() which sets 1076 * the inline-data flag and initializes the inline-data section. 1077 */ 1078 static int ocfs2_fill_new_dir_id(struct ocfs2_super *osb, 1079 handle_t *handle, 1080 struct inode *parent, 1081 struct inode *inode, 1082 struct buffer_head *di_bh) 1083 { 1084 int ret; 1085 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 1086 struct ocfs2_inline_data *data = &di->id2.i_data; 1087 unsigned int size = le16_to_cpu(data->id_count); 1088 1089 ret = ocfs2_journal_access(handle, inode, di_bh, 1090 OCFS2_JOURNAL_ACCESS_WRITE); 1091 if (ret) { 1092 mlog_errno(ret); 1093 goto out; 1094 } 1095 1096 ocfs2_fill_initial_dirents(inode, parent, data->id_data, size); 1097 1098 ocfs2_journal_dirty(handle, di_bh); 1099 if (ret) { 1100 mlog_errno(ret); 1101 goto out; 1102 } 1103 1104 i_size_write(inode, size); 1105 inode->i_nlink = 2; 1106 inode->i_blocks = ocfs2_inode_sector_count(inode); 1107 1108 ret = ocfs2_mark_inode_dirty(handle, inode, di_bh); 1109 if (ret < 0) 1110 mlog_errno(ret); 1111 1112 out: 1113 return ret; 1114 } 1115 1116 static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb, 1117 handle_t *handle, 1118 struct inode *parent, 1119 struct inode *inode, 1120 struct buffer_head *fe_bh, 1121 struct ocfs2_alloc_context *data_ac) 1122 { 1123 int status; 1124 struct buffer_head *new_bh = NULL; 1125 1126 mlog_entry_void(); 1127 1128 status = ocfs2_do_extend_dir(osb->sb, handle, inode, fe_bh, 1129 data_ac, NULL, &new_bh); 1130 if (status < 0) { 1131 mlog_errno(status); 1132 goto bail; 1133 } 1134 1135 ocfs2_set_new_buffer_uptodate(inode, new_bh); 1136 1137 status = ocfs2_journal_access(handle, inode, new_bh, 1138 OCFS2_JOURNAL_ACCESS_CREATE); 1139 if (status < 0) { 1140 mlog_errno(status); 1141 goto bail; 1142 } 1143 memset(new_bh->b_data, 0, osb->sb->s_blocksize); 1144 1145 ocfs2_fill_initial_dirents(inode, parent, new_bh->b_data, 1146 osb->sb->s_blocksize); 1147 1148 status = ocfs2_journal_dirty(handle, new_bh); 1149 if (status < 0) { 1150 mlog_errno(status); 1151 goto bail; 1152 } 1153 1154 i_size_write(inode, inode->i_sb->s_blocksize); 1155 inode->i_nlink = 2; 1156 inode->i_blocks = ocfs2_inode_sector_count(inode); 1157 status = ocfs2_mark_inode_dirty(handle, inode, fe_bh); 1158 if (status < 0) { 1159 mlog_errno(status); 1160 goto bail; 1161 } 1162 1163 status = 0; 1164 bail: 1165 brelse(new_bh); 1166 1167 mlog_exit(status); 1168 return status; 1169 } 1170 1171 int ocfs2_fill_new_dir(struct ocfs2_super *osb, 1172 handle_t *handle, 1173 struct inode *parent, 1174 struct inode *inode, 1175 struct buffer_head *fe_bh, 1176 struct ocfs2_alloc_context *data_ac) 1177 { 1178 BUG_ON(!ocfs2_supports_inline_data(osb) && data_ac == NULL); 1179 1180 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) 1181 return ocfs2_fill_new_dir_id(osb, handle, parent, inode, fe_bh); 1182 1183 return ocfs2_fill_new_dir_el(osb, handle, parent, inode, fe_bh, 1184 data_ac); 1185 } 1186 1187 static void ocfs2_expand_last_dirent(char *start, unsigned int old_size, 1188 unsigned int new_size) 1189 { 1190 struct ocfs2_dir_entry *de; 1191 struct ocfs2_dir_entry *prev_de; 1192 char *de_buf, *limit; 1193 unsigned int bytes = new_size - old_size; 1194 1195 limit = start + old_size; 1196 de_buf = start; 1197 de = (struct ocfs2_dir_entry *)de_buf; 1198 do { 1199 prev_de = de; 1200 de_buf += le16_to_cpu(de->rec_len); 1201 de = (struct ocfs2_dir_entry *)de_buf; 1202 } while (de_buf < limit); 1203 1204 le16_add_cpu(&prev_de->rec_len, bytes); 1205 } 1206 1207 /* 1208 * We allocate enough clusters to fulfill "blocks_wanted", but set 1209 * i_size to exactly one block. Ocfs2_extend_dir() will handle the 1210 * rest automatically for us. 1211 * 1212 * *first_block_bh is a pointer to the 1st data block allocated to the 1213 * directory. 1214 */ 1215 static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, 1216 unsigned int blocks_wanted, 1217 struct buffer_head **first_block_bh) 1218 { 1219 int ret, credits = OCFS2_INLINE_TO_EXTENTS_CREDITS; 1220 u32 alloc, bit_off, len; 1221 struct super_block *sb = dir->i_sb; 1222 u64 blkno, bytes = blocks_wanted << sb->s_blocksize_bits; 1223 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); 1224 struct ocfs2_inode_info *oi = OCFS2_I(dir); 1225 struct ocfs2_alloc_context *data_ac; 1226 struct buffer_head *dirdata_bh = NULL; 1227 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 1228 handle_t *handle; 1229 struct ocfs2_extent_tree et; 1230 1231 ocfs2_init_dinode_extent_tree(&et, dir, di_bh); 1232 1233 alloc = ocfs2_clusters_for_bytes(sb, bytes); 1234 1235 /* 1236 * We should never need more than 2 clusters for this - 1237 * maximum dirent size is far less than one block. In fact, 1238 * the only time we'd need more than one cluster is if 1239 * blocksize == clustersize and the dirent won't fit in the 1240 * extra space that the expansion to a single block gives. As 1241 * of today, that only happens on 4k/4k file systems. 1242 */ 1243 BUG_ON(alloc > 2); 1244 1245 ret = ocfs2_reserve_clusters(osb, alloc, &data_ac); 1246 if (ret) { 1247 mlog_errno(ret); 1248 goto out; 1249 } 1250 1251 down_write(&oi->ip_alloc_sem); 1252 1253 /* 1254 * Prepare for worst case allocation scenario of two separate 1255 * extents. 1256 */ 1257 if (alloc == 2) 1258 credits += OCFS2_SUBALLOC_ALLOC; 1259 1260 handle = ocfs2_start_trans(osb, credits); 1261 if (IS_ERR(handle)) { 1262 ret = PTR_ERR(handle); 1263 mlog_errno(ret); 1264 goto out_sem; 1265 } 1266 1267 /* 1268 * Try to claim as many clusters as the bitmap can give though 1269 * if we only get one now, that's enough to continue. The rest 1270 * will be claimed after the conversion to extents. 1271 */ 1272 ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off, &len); 1273 if (ret) { 1274 mlog_errno(ret); 1275 goto out_commit; 1276 } 1277 1278 /* 1279 * Operations are carefully ordered so that we set up the new 1280 * data block first. The conversion from inline data to 1281 * extents follows. 1282 */ 1283 blkno = ocfs2_clusters_to_blocks(dir->i_sb, bit_off); 1284 dirdata_bh = sb_getblk(sb, blkno); 1285 if (!dirdata_bh) { 1286 ret = -EIO; 1287 mlog_errno(ret); 1288 goto out_commit; 1289 } 1290 1291 ocfs2_set_new_buffer_uptodate(dir, dirdata_bh); 1292 1293 ret = ocfs2_journal_access(handle, dir, dirdata_bh, 1294 OCFS2_JOURNAL_ACCESS_CREATE); 1295 if (ret) { 1296 mlog_errno(ret); 1297 goto out_commit; 1298 } 1299 1300 memcpy(dirdata_bh->b_data, di->id2.i_data.id_data, i_size_read(dir)); 1301 memset(dirdata_bh->b_data + i_size_read(dir), 0, 1302 sb->s_blocksize - i_size_read(dir)); 1303 ocfs2_expand_last_dirent(dirdata_bh->b_data, i_size_read(dir), 1304 sb->s_blocksize); 1305 1306 ret = ocfs2_journal_dirty(handle, dirdata_bh); 1307 if (ret) { 1308 mlog_errno(ret); 1309 goto out_commit; 1310 } 1311 1312 /* 1313 * Set extent, i_size, etc on the directory. After this, the 1314 * inode should contain the same exact dirents as before and 1315 * be fully accessible from system calls. 1316 * 1317 * We let the later dirent insert modify c/mtime - to the user 1318 * the data hasn't changed. 1319 */ 1320 ret = ocfs2_journal_access(handle, dir, di_bh, 1321 OCFS2_JOURNAL_ACCESS_CREATE); 1322 if (ret) { 1323 mlog_errno(ret); 1324 goto out_commit; 1325 } 1326 1327 spin_lock(&oi->ip_lock); 1328 oi->ip_dyn_features &= ~OCFS2_INLINE_DATA_FL; 1329 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 1330 spin_unlock(&oi->ip_lock); 1331 1332 ocfs2_dinode_new_extent_list(dir, di); 1333 1334 i_size_write(dir, sb->s_blocksize); 1335 dir->i_mtime = dir->i_ctime = CURRENT_TIME; 1336 1337 di->i_size = cpu_to_le64(sb->s_blocksize); 1338 di->i_ctime = di->i_mtime = cpu_to_le64(dir->i_ctime.tv_sec); 1339 di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(dir->i_ctime.tv_nsec); 1340 1341 /* 1342 * This should never fail as our extent list is empty and all 1343 * related blocks have been journaled already. 1344 */ 1345 ret = ocfs2_insert_extent(osb, handle, dir, &et, 0, blkno, len, 1346 0, NULL); 1347 if (ret) { 1348 mlog_errno(ret); 1349 goto out_commit; 1350 } 1351 1352 /* 1353 * Set i_blocks after the extent insert for the most up to 1354 * date ip_clusters value. 1355 */ 1356 dir->i_blocks = ocfs2_inode_sector_count(dir); 1357 1358 ret = ocfs2_journal_dirty(handle, di_bh); 1359 if (ret) { 1360 mlog_errno(ret); 1361 goto out_commit; 1362 } 1363 1364 /* 1365 * We asked for two clusters, but only got one in the 1st 1366 * pass. Claim the 2nd cluster as a separate extent. 1367 */ 1368 if (alloc > len) { 1369 ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off, 1370 &len); 1371 if (ret) { 1372 mlog_errno(ret); 1373 goto out_commit; 1374 } 1375 blkno = ocfs2_clusters_to_blocks(dir->i_sb, bit_off); 1376 1377 ret = ocfs2_insert_extent(osb, handle, dir, &et, 1, 1378 blkno, len, 0, NULL); 1379 if (ret) { 1380 mlog_errno(ret); 1381 goto out_commit; 1382 } 1383 } 1384 1385 *first_block_bh = dirdata_bh; 1386 dirdata_bh = NULL; 1387 1388 out_commit: 1389 ocfs2_commit_trans(osb, handle); 1390 1391 out_sem: 1392 up_write(&oi->ip_alloc_sem); 1393 1394 out: 1395 if (data_ac) 1396 ocfs2_free_alloc_context(data_ac); 1397 1398 brelse(dirdata_bh); 1399 1400 return ret; 1401 } 1402 1403 /* returns a bh of the 1st new block in the allocation. */ 1404 static int ocfs2_do_extend_dir(struct super_block *sb, 1405 handle_t *handle, 1406 struct inode *dir, 1407 struct buffer_head *parent_fe_bh, 1408 struct ocfs2_alloc_context *data_ac, 1409 struct ocfs2_alloc_context *meta_ac, 1410 struct buffer_head **new_bh) 1411 { 1412 int status; 1413 int extend; 1414 u64 p_blkno, v_blkno; 1415 1416 spin_lock(&OCFS2_I(dir)->ip_lock); 1417 extend = (i_size_read(dir) == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters)); 1418 spin_unlock(&OCFS2_I(dir)->ip_lock); 1419 1420 if (extend) { 1421 u32 offset = OCFS2_I(dir)->ip_clusters; 1422 1423 status = ocfs2_add_inode_data(OCFS2_SB(sb), dir, &offset, 1424 1, 0, parent_fe_bh, handle, 1425 data_ac, meta_ac, NULL); 1426 BUG_ON(status == -EAGAIN); 1427 if (status < 0) { 1428 mlog_errno(status); 1429 goto bail; 1430 } 1431 } 1432 1433 v_blkno = ocfs2_blocks_for_bytes(sb, i_size_read(dir)); 1434 status = ocfs2_extent_map_get_blocks(dir, v_blkno, &p_blkno, NULL, NULL); 1435 if (status < 0) { 1436 mlog_errno(status); 1437 goto bail; 1438 } 1439 1440 *new_bh = sb_getblk(sb, p_blkno); 1441 if (!*new_bh) { 1442 status = -EIO; 1443 mlog_errno(status); 1444 goto bail; 1445 } 1446 status = 0; 1447 bail: 1448 mlog_exit(status); 1449 return status; 1450 } 1451 1452 /* 1453 * Assumes you already have a cluster lock on the directory. 1454 * 1455 * 'blocks_wanted' is only used if we have an inline directory which 1456 * is to be turned into an extent based one. The size of the dirent to 1457 * insert might be larger than the space gained by growing to just one 1458 * block, so we may have to grow the inode by two blocks in that case. 1459 */ 1460 static int ocfs2_extend_dir(struct ocfs2_super *osb, 1461 struct inode *dir, 1462 struct buffer_head *parent_fe_bh, 1463 unsigned int blocks_wanted, 1464 struct buffer_head **new_de_bh) 1465 { 1466 int status = 0; 1467 int credits, num_free_extents, drop_alloc_sem = 0; 1468 loff_t dir_i_size; 1469 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) parent_fe_bh->b_data; 1470 struct ocfs2_extent_list *el = &fe->id2.i_list; 1471 struct ocfs2_alloc_context *data_ac = NULL; 1472 struct ocfs2_alloc_context *meta_ac = NULL; 1473 handle_t *handle = NULL; 1474 struct buffer_head *new_bh = NULL; 1475 struct ocfs2_dir_entry * de; 1476 struct super_block *sb = osb->sb; 1477 struct ocfs2_extent_tree et; 1478 1479 mlog_entry_void(); 1480 1481 if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 1482 status = ocfs2_expand_inline_dir(dir, parent_fe_bh, 1483 blocks_wanted, &new_bh); 1484 if (status) { 1485 mlog_errno(status); 1486 goto bail; 1487 } 1488 1489 if (blocks_wanted == 1) { 1490 /* 1491 * If the new dirent will fit inside the space 1492 * created by pushing out to one block, then 1493 * we can complete the operation 1494 * here. Otherwise we have to expand i_size 1495 * and format the 2nd block below. 1496 */ 1497 BUG_ON(new_bh == NULL); 1498 goto bail_bh; 1499 } 1500 1501 /* 1502 * Get rid of 'new_bh' - we want to format the 2nd 1503 * data block and return that instead. 1504 */ 1505 brelse(new_bh); 1506 new_bh = NULL; 1507 1508 dir_i_size = i_size_read(dir); 1509 credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS; 1510 goto do_extend; 1511 } 1512 1513 dir_i_size = i_size_read(dir); 1514 mlog(0, "extending dir %llu (i_size = %lld)\n", 1515 (unsigned long long)OCFS2_I(dir)->ip_blkno, dir_i_size); 1516 1517 /* dir->i_size is always block aligned. */ 1518 spin_lock(&OCFS2_I(dir)->ip_lock); 1519 if (dir_i_size == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters)) { 1520 spin_unlock(&OCFS2_I(dir)->ip_lock); 1521 ocfs2_init_dinode_extent_tree(&et, dir, parent_fe_bh); 1522 num_free_extents = ocfs2_num_free_extents(osb, dir, &et); 1523 if (num_free_extents < 0) { 1524 status = num_free_extents; 1525 mlog_errno(status); 1526 goto bail; 1527 } 1528 1529 if (!num_free_extents) { 1530 status = ocfs2_reserve_new_metadata(osb, el, &meta_ac); 1531 if (status < 0) { 1532 if (status != -ENOSPC) 1533 mlog_errno(status); 1534 goto bail; 1535 } 1536 } 1537 1538 status = ocfs2_reserve_clusters(osb, 1, &data_ac); 1539 if (status < 0) { 1540 if (status != -ENOSPC) 1541 mlog_errno(status); 1542 goto bail; 1543 } 1544 1545 credits = ocfs2_calc_extend_credits(sb, el, 1); 1546 } else { 1547 spin_unlock(&OCFS2_I(dir)->ip_lock); 1548 credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS; 1549 } 1550 1551 do_extend: 1552 down_write(&OCFS2_I(dir)->ip_alloc_sem); 1553 drop_alloc_sem = 1; 1554 1555 handle = ocfs2_start_trans(osb, credits); 1556 if (IS_ERR(handle)) { 1557 status = PTR_ERR(handle); 1558 handle = NULL; 1559 mlog_errno(status); 1560 goto bail; 1561 } 1562 1563 status = ocfs2_do_extend_dir(osb->sb, handle, dir, parent_fe_bh, 1564 data_ac, meta_ac, &new_bh); 1565 if (status < 0) { 1566 mlog_errno(status); 1567 goto bail; 1568 } 1569 1570 ocfs2_set_new_buffer_uptodate(dir, new_bh); 1571 1572 status = ocfs2_journal_access(handle, dir, new_bh, 1573 OCFS2_JOURNAL_ACCESS_CREATE); 1574 if (status < 0) { 1575 mlog_errno(status); 1576 goto bail; 1577 } 1578 memset(new_bh->b_data, 0, sb->s_blocksize); 1579 de = (struct ocfs2_dir_entry *) new_bh->b_data; 1580 de->inode = 0; 1581 de->rec_len = cpu_to_le16(sb->s_blocksize); 1582 status = ocfs2_journal_dirty(handle, new_bh); 1583 if (status < 0) { 1584 mlog_errno(status); 1585 goto bail; 1586 } 1587 1588 dir_i_size += dir->i_sb->s_blocksize; 1589 i_size_write(dir, dir_i_size); 1590 dir->i_blocks = ocfs2_inode_sector_count(dir); 1591 status = ocfs2_mark_inode_dirty(handle, dir, parent_fe_bh); 1592 if (status < 0) { 1593 mlog_errno(status); 1594 goto bail; 1595 } 1596 1597 bail_bh: 1598 *new_de_bh = new_bh; 1599 get_bh(*new_de_bh); 1600 bail: 1601 if (drop_alloc_sem) 1602 up_write(&OCFS2_I(dir)->ip_alloc_sem); 1603 if (handle) 1604 ocfs2_commit_trans(osb, handle); 1605 1606 if (data_ac) 1607 ocfs2_free_alloc_context(data_ac); 1608 if (meta_ac) 1609 ocfs2_free_alloc_context(meta_ac); 1610 1611 brelse(new_bh); 1612 1613 mlog_exit(status); 1614 return status; 1615 } 1616 1617 static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh, 1618 const char *name, int namelen, 1619 struct buffer_head **ret_de_bh, 1620 unsigned int *blocks_wanted) 1621 { 1622 int ret; 1623 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 1624 struct ocfs2_dir_entry *de, *last_de = NULL; 1625 char *de_buf, *limit; 1626 unsigned long offset = 0; 1627 unsigned int rec_len, new_rec_len; 1628 1629 de_buf = di->id2.i_data.id_data; 1630 limit = de_buf + i_size_read(dir); 1631 rec_len = OCFS2_DIR_REC_LEN(namelen); 1632 1633 while (de_buf < limit) { 1634 de = (struct ocfs2_dir_entry *)de_buf; 1635 1636 if (!ocfs2_check_dir_entry(dir, de, di_bh, offset)) { 1637 ret = -ENOENT; 1638 goto out; 1639 } 1640 if (ocfs2_match(namelen, name, de)) { 1641 ret = -EEXIST; 1642 goto out; 1643 } 1644 if (ocfs2_dirent_would_fit(de, rec_len)) { 1645 /* Ok, we found a spot. Return this bh and let 1646 * the caller actually fill it in. */ 1647 *ret_de_bh = di_bh; 1648 get_bh(*ret_de_bh); 1649 ret = 0; 1650 goto out; 1651 } 1652 1653 last_de = de; 1654 de_buf += le16_to_cpu(de->rec_len); 1655 offset += le16_to_cpu(de->rec_len); 1656 } 1657 1658 /* 1659 * We're going to require expansion of the directory - figure 1660 * out how many blocks we'll need so that a place for the 1661 * dirent can be found. 1662 */ 1663 *blocks_wanted = 1; 1664 new_rec_len = le16_to_cpu(last_de->rec_len) + (dir->i_sb->s_blocksize - i_size_read(dir)); 1665 if (new_rec_len < (rec_len + OCFS2_DIR_REC_LEN(last_de->name_len))) 1666 *blocks_wanted = 2; 1667 1668 ret = -ENOSPC; 1669 out: 1670 return ret; 1671 } 1672 1673 static int ocfs2_find_dir_space_el(struct inode *dir, const char *name, 1674 int namelen, struct buffer_head **ret_de_bh) 1675 { 1676 unsigned long offset; 1677 struct buffer_head *bh = NULL; 1678 unsigned short rec_len; 1679 struct ocfs2_dir_entry *de; 1680 struct super_block *sb = dir->i_sb; 1681 int status; 1682 1683 bh = ocfs2_bread(dir, 0, &status, 0); 1684 if (!bh) { 1685 mlog_errno(status); 1686 goto bail; 1687 } 1688 1689 rec_len = OCFS2_DIR_REC_LEN(namelen); 1690 offset = 0; 1691 de = (struct ocfs2_dir_entry *) bh->b_data; 1692 while (1) { 1693 if ((char *)de >= sb->s_blocksize + bh->b_data) { 1694 brelse(bh); 1695 bh = NULL; 1696 1697 if (i_size_read(dir) <= offset) { 1698 /* 1699 * Caller will have to expand this 1700 * directory. 1701 */ 1702 status = -ENOSPC; 1703 goto bail; 1704 } 1705 bh = ocfs2_bread(dir, 1706 offset >> sb->s_blocksize_bits, 1707 &status, 1708 0); 1709 if (!bh) { 1710 mlog_errno(status); 1711 goto bail; 1712 } 1713 /* move to next block */ 1714 de = (struct ocfs2_dir_entry *) bh->b_data; 1715 } 1716 if (!ocfs2_check_dir_entry(dir, de, bh, offset)) { 1717 status = -ENOENT; 1718 goto bail; 1719 } 1720 if (ocfs2_match(namelen, name, de)) { 1721 status = -EEXIST; 1722 goto bail; 1723 } 1724 if (ocfs2_dirent_would_fit(de, rec_len)) { 1725 /* Ok, we found a spot. Return this bh and let 1726 * the caller actually fill it in. */ 1727 *ret_de_bh = bh; 1728 get_bh(*ret_de_bh); 1729 status = 0; 1730 goto bail; 1731 } 1732 offset += le16_to_cpu(de->rec_len); 1733 de = (struct ocfs2_dir_entry *)((char *) de + le16_to_cpu(de->rec_len)); 1734 } 1735 1736 status = 0; 1737 bail: 1738 brelse(bh); 1739 1740 mlog_exit(status); 1741 return status; 1742 } 1743 1744 int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb, 1745 struct inode *dir, 1746 struct buffer_head *parent_fe_bh, 1747 const char *name, 1748 int namelen, 1749 struct buffer_head **ret_de_bh) 1750 { 1751 int ret; 1752 unsigned int blocks_wanted = 1; 1753 struct buffer_head *bh = NULL; 1754 1755 mlog(0, "getting ready to insert namelen %d into dir %llu\n", 1756 namelen, (unsigned long long)OCFS2_I(dir)->ip_blkno); 1757 1758 *ret_de_bh = NULL; 1759 1760 if (!namelen) { 1761 ret = -EINVAL; 1762 mlog_errno(ret); 1763 goto out; 1764 } 1765 1766 if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 1767 ret = ocfs2_find_dir_space_id(dir, parent_fe_bh, name, 1768 namelen, &bh, &blocks_wanted); 1769 } else 1770 ret = ocfs2_find_dir_space_el(dir, name, namelen, &bh); 1771 1772 if (ret && ret != -ENOSPC) { 1773 mlog_errno(ret); 1774 goto out; 1775 } 1776 1777 if (ret == -ENOSPC) { 1778 /* 1779 * We have to expand the directory to add this name. 1780 */ 1781 BUG_ON(bh); 1782 1783 ret = ocfs2_extend_dir(osb, dir, parent_fe_bh, blocks_wanted, 1784 &bh); 1785 if (ret) { 1786 if (ret != -ENOSPC) 1787 mlog_errno(ret); 1788 goto out; 1789 } 1790 1791 BUG_ON(!bh); 1792 } 1793 1794 *ret_de_bh = bh; 1795 bh = NULL; 1796 out: 1797 brelse(bh); 1798 return ret; 1799 } 1800