1 /* -*- mode: c; c-basic-offset: 8; -*- 2 * vim: noexpandtab sw=8 ts=8 sts=0: 3 * 4 * xattr.c 5 * 6 * Copyright (C) 2004, 2008 Oracle. All rights reserved. 7 * 8 * CREDITS: 9 * Lots of code in this file is copy from linux/fs/ext3/xattr.c. 10 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> 11 * 12 * This program is free software; you can redistribute it and/or 13 * modify it under the terms of the GNU General Public 14 * License version 2 as published by the Free Software Foundation. 15 * 16 * This program is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 * General Public License for more details. 20 */ 21 22 #include <linux/capability.h> 23 #include <linux/fs.h> 24 #include <linux/types.h> 25 #include <linux/slab.h> 26 #include <linux/highmem.h> 27 #include <linux/pagemap.h> 28 #include <linux/uio.h> 29 #include <linux/sched.h> 30 #include <linux/splice.h> 31 #include <linux/mount.h> 32 #include <linux/writeback.h> 33 #include <linux/falloc.h> 34 #include <linux/sort.h> 35 #include <linux/init.h> 36 #include <linux/module.h> 37 #include <linux/string.h> 38 #include <linux/security.h> 39 40 #define MLOG_MASK_PREFIX ML_XATTR 41 #include <cluster/masklog.h> 42 43 #include "ocfs2.h" 44 #include "alloc.h" 45 #include "blockcheck.h" 46 #include "dlmglue.h" 47 #include "file.h" 48 #include "symlink.h" 49 #include "sysfile.h" 50 #include "inode.h" 51 #include "journal.h" 52 #include "ocfs2_fs.h" 53 #include "suballoc.h" 54 #include "uptodate.h" 55 #include "buffer_head_io.h" 56 #include "super.h" 57 #include "xattr.h" 58 #include "refcounttree.h" 59 #include "acl.h" 60 61 struct ocfs2_xattr_def_value_root { 62 struct ocfs2_xattr_value_root xv; 63 struct ocfs2_extent_rec er; 64 }; 65 66 struct ocfs2_xattr_bucket { 67 /* The inode these xattrs are associated with */ 68 struct inode *bu_inode; 69 70 /* The actual buffers that make up the bucket */ 71 struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET]; 72 73 /* How many blocks make up one bucket for this filesystem */ 74 int bu_blocks; 75 }; 76 77 struct ocfs2_xattr_set_ctxt { 78 handle_t *handle; 79 struct ocfs2_alloc_context *meta_ac; 80 struct ocfs2_alloc_context *data_ac; 81 struct ocfs2_cached_dealloc_ctxt dealloc; 82 int set_abort; 83 }; 84 85 #define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root)) 86 #define OCFS2_XATTR_INLINE_SIZE 80 87 #define OCFS2_XATTR_HEADER_GAP 4 88 #define OCFS2_XATTR_FREE_IN_IBODY (OCFS2_MIN_XATTR_INLINE_SIZE \ 89 - sizeof(struct ocfs2_xattr_header) \ 90 - OCFS2_XATTR_HEADER_GAP) 91 #define OCFS2_XATTR_FREE_IN_BLOCK(ptr) ((ptr)->i_sb->s_blocksize \ 92 - sizeof(struct ocfs2_xattr_block) \ 93 - sizeof(struct ocfs2_xattr_header) \ 94 - OCFS2_XATTR_HEADER_GAP) 95 96 static struct ocfs2_xattr_def_value_root def_xv = { 97 .xv.xr_list.l_count = cpu_to_le16(1), 98 }; 99 100 const struct xattr_handler *ocfs2_xattr_handlers[] = { 101 &ocfs2_xattr_user_handler, 102 &ocfs2_xattr_acl_access_handler, 103 &ocfs2_xattr_acl_default_handler, 104 &ocfs2_xattr_trusted_handler, 105 &ocfs2_xattr_security_handler, 106 NULL 107 }; 108 109 static const struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = { 110 [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler, 111 [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS] 112 = &ocfs2_xattr_acl_access_handler, 113 [OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT] 114 = &ocfs2_xattr_acl_default_handler, 115 [OCFS2_XATTR_INDEX_TRUSTED] = &ocfs2_xattr_trusted_handler, 116 [OCFS2_XATTR_INDEX_SECURITY] = &ocfs2_xattr_security_handler, 117 }; 118 119 struct ocfs2_xattr_info { 120 int xi_name_index; 121 const char *xi_name; 122 int xi_name_len; 123 const void *xi_value; 124 size_t xi_value_len; 125 }; 126 127 struct ocfs2_xattr_search { 128 struct buffer_head *inode_bh; 129 /* 130 * xattr_bh point to the block buffer head which has extended attribute 131 * when extended attribute in inode, xattr_bh is equal to inode_bh. 132 */ 133 struct buffer_head *xattr_bh; 134 struct ocfs2_xattr_header *header; 135 struct ocfs2_xattr_bucket *bucket; 136 void *base; 137 void *end; 138 struct ocfs2_xattr_entry *here; 139 int not_found; 140 }; 141 142 /* Operations on struct ocfs2_xa_entry */ 143 struct ocfs2_xa_loc; 144 struct ocfs2_xa_loc_operations { 145 /* 146 * Journal functions 147 */ 148 int (*xlo_journal_access)(handle_t *handle, struct ocfs2_xa_loc *loc, 149 int type); 150 void (*xlo_journal_dirty)(handle_t *handle, struct ocfs2_xa_loc *loc); 151 152 /* 153 * Return a pointer to the appropriate buffer in loc->xl_storage 154 * at the given offset from loc->xl_header. 155 */ 156 void *(*xlo_offset_pointer)(struct ocfs2_xa_loc *loc, int offset); 157 158 /* Can we reuse the existing entry for the new value? */ 159 int (*xlo_can_reuse)(struct ocfs2_xa_loc *loc, 160 struct ocfs2_xattr_info *xi); 161 162 /* How much space is needed for the new value? */ 163 int (*xlo_check_space)(struct ocfs2_xa_loc *loc, 164 struct ocfs2_xattr_info *xi); 165 166 /* 167 * Return the offset of the first name+value pair. This is 168 * the start of our downward-filling free space. 169 */ 170 int (*xlo_get_free_start)(struct ocfs2_xa_loc *loc); 171 172 /* 173 * Remove the name+value at this location. Do whatever is 174 * appropriate with the remaining name+value pairs. 175 */ 176 void (*xlo_wipe_namevalue)(struct ocfs2_xa_loc *loc); 177 178 /* Fill xl_entry with a new entry */ 179 void (*xlo_add_entry)(struct ocfs2_xa_loc *loc, u32 name_hash); 180 181 /* Add name+value storage to an entry */ 182 void (*xlo_add_namevalue)(struct ocfs2_xa_loc *loc, int size); 183 184 /* 185 * Initialize the value buf's access and bh fields for this entry. 186 * ocfs2_xa_fill_value_buf() will handle the xv pointer. 187 */ 188 void (*xlo_fill_value_buf)(struct ocfs2_xa_loc *loc, 189 struct ocfs2_xattr_value_buf *vb); 190 }; 191 192 /* 193 * Describes an xattr entry location. This is a memory structure 194 * tracking the on-disk structure. 195 */ 196 struct ocfs2_xa_loc { 197 /* This xattr belongs to this inode */ 198 struct inode *xl_inode; 199 200 /* The ocfs2_xattr_header inside the on-disk storage. Not NULL. */ 201 struct ocfs2_xattr_header *xl_header; 202 203 /* Bytes from xl_header to the end of the storage */ 204 int xl_size; 205 206 /* 207 * The ocfs2_xattr_entry this location describes. If this is 208 * NULL, this location describes the on-disk structure where it 209 * would have been. 210 */ 211 struct ocfs2_xattr_entry *xl_entry; 212 213 /* 214 * Internal housekeeping 215 */ 216 217 /* Buffer(s) containing this entry */ 218 void *xl_storage; 219 220 /* Operations on the storage backing this location */ 221 const struct ocfs2_xa_loc_operations *xl_ops; 222 }; 223 224 /* 225 * Convenience functions to calculate how much space is needed for a 226 * given name+value pair 227 */ 228 static int namevalue_size(int name_len, uint64_t value_len) 229 { 230 if (value_len > OCFS2_XATTR_INLINE_SIZE) 231 return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE; 232 else 233 return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len); 234 } 235 236 static int namevalue_size_xi(struct ocfs2_xattr_info *xi) 237 { 238 return namevalue_size(xi->xi_name_len, xi->xi_value_len); 239 } 240 241 static int namevalue_size_xe(struct ocfs2_xattr_entry *xe) 242 { 243 u64 value_len = le64_to_cpu(xe->xe_value_size); 244 245 BUG_ON((value_len > OCFS2_XATTR_INLINE_SIZE) && 246 ocfs2_xattr_is_local(xe)); 247 return namevalue_size(xe->xe_name_len, value_len); 248 } 249 250 251 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb, 252 struct ocfs2_xattr_header *xh, 253 int index, 254 int *block_off, 255 int *new_offset); 256 257 static int ocfs2_xattr_block_find(struct inode *inode, 258 int name_index, 259 const char *name, 260 struct ocfs2_xattr_search *xs); 261 static int ocfs2_xattr_index_block_find(struct inode *inode, 262 struct buffer_head *root_bh, 263 int name_index, 264 const char *name, 265 struct ocfs2_xattr_search *xs); 266 267 static int ocfs2_xattr_tree_list_index_block(struct inode *inode, 268 struct buffer_head *blk_bh, 269 char *buffer, 270 size_t buffer_size); 271 272 static int ocfs2_xattr_create_index_block(struct inode *inode, 273 struct ocfs2_xattr_search *xs, 274 struct ocfs2_xattr_set_ctxt *ctxt); 275 276 static int ocfs2_xattr_set_entry_index_block(struct inode *inode, 277 struct ocfs2_xattr_info *xi, 278 struct ocfs2_xattr_search *xs, 279 struct ocfs2_xattr_set_ctxt *ctxt); 280 281 typedef int (xattr_tree_rec_func)(struct inode *inode, 282 struct buffer_head *root_bh, 283 u64 blkno, u32 cpos, u32 len, void *para); 284 static int ocfs2_iterate_xattr_index_block(struct inode *inode, 285 struct buffer_head *root_bh, 286 xattr_tree_rec_func *rec_func, 287 void *para); 288 static int ocfs2_delete_xattr_in_bucket(struct inode *inode, 289 struct ocfs2_xattr_bucket *bucket, 290 void *para); 291 static int ocfs2_rm_xattr_cluster(struct inode *inode, 292 struct buffer_head *root_bh, 293 u64 blkno, 294 u32 cpos, 295 u32 len, 296 void *para); 297 298 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle, 299 u64 src_blk, u64 last_blk, u64 to_blk, 300 unsigned int start_bucket, 301 u32 *first_hash); 302 static int ocfs2_prepare_refcount_xattr(struct inode *inode, 303 struct ocfs2_dinode *di, 304 struct ocfs2_xattr_info *xi, 305 struct ocfs2_xattr_search *xis, 306 struct ocfs2_xattr_search *xbs, 307 struct ocfs2_refcount_tree **ref_tree, 308 int *meta_need, 309 int *credits); 310 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb, 311 struct ocfs2_xattr_bucket *bucket, 312 int offset, 313 struct ocfs2_xattr_value_root **xv, 314 struct buffer_head **bh); 315 316 static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb) 317 { 318 return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE; 319 } 320 321 static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb) 322 { 323 return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits); 324 } 325 326 #define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr) 327 #define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data) 328 #define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0)) 329 330 static struct ocfs2_xattr_bucket *ocfs2_xattr_bucket_new(struct inode *inode) 331 { 332 struct ocfs2_xattr_bucket *bucket; 333 int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 334 335 BUG_ON(blks > OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET); 336 337 bucket = kzalloc(sizeof(struct ocfs2_xattr_bucket), GFP_NOFS); 338 if (bucket) { 339 bucket->bu_inode = inode; 340 bucket->bu_blocks = blks; 341 } 342 343 return bucket; 344 } 345 346 static void ocfs2_xattr_bucket_relse(struct ocfs2_xattr_bucket *bucket) 347 { 348 int i; 349 350 for (i = 0; i < bucket->bu_blocks; i++) { 351 brelse(bucket->bu_bhs[i]); 352 bucket->bu_bhs[i] = NULL; 353 } 354 } 355 356 static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket) 357 { 358 if (bucket) { 359 ocfs2_xattr_bucket_relse(bucket); 360 bucket->bu_inode = NULL; 361 kfree(bucket); 362 } 363 } 364 365 /* 366 * A bucket that has never been written to disk doesn't need to be 367 * read. We just need the buffer_heads. Don't call this for 368 * buckets that are already on disk. ocfs2_read_xattr_bucket() initializes 369 * them fully. 370 */ 371 static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket, 372 u64 xb_blkno) 373 { 374 int i, rc = 0; 375 376 for (i = 0; i < bucket->bu_blocks; i++) { 377 bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb, 378 xb_blkno + i); 379 if (!bucket->bu_bhs[i]) { 380 rc = -EIO; 381 mlog_errno(rc); 382 break; 383 } 384 385 if (!ocfs2_buffer_uptodate(INODE_CACHE(bucket->bu_inode), 386 bucket->bu_bhs[i])) 387 ocfs2_set_new_buffer_uptodate(INODE_CACHE(bucket->bu_inode), 388 bucket->bu_bhs[i]); 389 } 390 391 if (rc) 392 ocfs2_xattr_bucket_relse(bucket); 393 return rc; 394 } 395 396 /* Read the xattr bucket at xb_blkno */ 397 static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket, 398 u64 xb_blkno) 399 { 400 int rc; 401 402 rc = ocfs2_read_blocks(INODE_CACHE(bucket->bu_inode), xb_blkno, 403 bucket->bu_blocks, bucket->bu_bhs, 0, 404 NULL); 405 if (!rc) { 406 spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 407 rc = ocfs2_validate_meta_ecc_bhs(bucket->bu_inode->i_sb, 408 bucket->bu_bhs, 409 bucket->bu_blocks, 410 &bucket_xh(bucket)->xh_check); 411 spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 412 if (rc) 413 mlog_errno(rc); 414 } 415 416 if (rc) 417 ocfs2_xattr_bucket_relse(bucket); 418 return rc; 419 } 420 421 static int ocfs2_xattr_bucket_journal_access(handle_t *handle, 422 struct ocfs2_xattr_bucket *bucket, 423 int type) 424 { 425 int i, rc = 0; 426 427 for (i = 0; i < bucket->bu_blocks; i++) { 428 rc = ocfs2_journal_access(handle, 429 INODE_CACHE(bucket->bu_inode), 430 bucket->bu_bhs[i], type); 431 if (rc) { 432 mlog_errno(rc); 433 break; 434 } 435 } 436 437 return rc; 438 } 439 440 static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle, 441 struct ocfs2_xattr_bucket *bucket) 442 { 443 int i; 444 445 spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 446 ocfs2_compute_meta_ecc_bhs(bucket->bu_inode->i_sb, 447 bucket->bu_bhs, bucket->bu_blocks, 448 &bucket_xh(bucket)->xh_check); 449 spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 450 451 for (i = 0; i < bucket->bu_blocks; i++) 452 ocfs2_journal_dirty(handle, bucket->bu_bhs[i]); 453 } 454 455 static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest, 456 struct ocfs2_xattr_bucket *src) 457 { 458 int i; 459 int blocksize = src->bu_inode->i_sb->s_blocksize; 460 461 BUG_ON(dest->bu_blocks != src->bu_blocks); 462 BUG_ON(dest->bu_inode != src->bu_inode); 463 464 for (i = 0; i < src->bu_blocks; i++) { 465 memcpy(bucket_block(dest, i), bucket_block(src, i), 466 blocksize); 467 } 468 } 469 470 static int ocfs2_validate_xattr_block(struct super_block *sb, 471 struct buffer_head *bh) 472 { 473 int rc; 474 struct ocfs2_xattr_block *xb = 475 (struct ocfs2_xattr_block *)bh->b_data; 476 477 mlog(0, "Validating xattr block %llu\n", 478 (unsigned long long)bh->b_blocknr); 479 480 BUG_ON(!buffer_uptodate(bh)); 481 482 /* 483 * If the ecc fails, we return the error but otherwise 484 * leave the filesystem running. We know any error is 485 * local to this block. 486 */ 487 rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &xb->xb_check); 488 if (rc) 489 return rc; 490 491 /* 492 * Errors after here are fatal 493 */ 494 495 if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) { 496 ocfs2_error(sb, 497 "Extended attribute block #%llu has bad " 498 "signature %.*s", 499 (unsigned long long)bh->b_blocknr, 7, 500 xb->xb_signature); 501 return -EINVAL; 502 } 503 504 if (le64_to_cpu(xb->xb_blkno) != bh->b_blocknr) { 505 ocfs2_error(sb, 506 "Extended attribute block #%llu has an " 507 "invalid xb_blkno of %llu", 508 (unsigned long long)bh->b_blocknr, 509 (unsigned long long)le64_to_cpu(xb->xb_blkno)); 510 return -EINVAL; 511 } 512 513 if (le32_to_cpu(xb->xb_fs_generation) != OCFS2_SB(sb)->fs_generation) { 514 ocfs2_error(sb, 515 "Extended attribute block #%llu has an invalid " 516 "xb_fs_generation of #%u", 517 (unsigned long long)bh->b_blocknr, 518 le32_to_cpu(xb->xb_fs_generation)); 519 return -EINVAL; 520 } 521 522 return 0; 523 } 524 525 static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno, 526 struct buffer_head **bh) 527 { 528 int rc; 529 struct buffer_head *tmp = *bh; 530 531 rc = ocfs2_read_block(INODE_CACHE(inode), xb_blkno, &tmp, 532 ocfs2_validate_xattr_block); 533 534 /* If ocfs2_read_block() got us a new bh, pass it up. */ 535 if (!rc && !*bh) 536 *bh = tmp; 537 538 return rc; 539 } 540 541 static inline const char *ocfs2_xattr_prefix(int name_index) 542 { 543 const struct xattr_handler *handler = NULL; 544 545 if (name_index > 0 && name_index < OCFS2_XATTR_MAX) 546 handler = ocfs2_xattr_handler_map[name_index]; 547 548 return handler ? handler->prefix : NULL; 549 } 550 551 static u32 ocfs2_xattr_name_hash(struct inode *inode, 552 const char *name, 553 int name_len) 554 { 555 /* Get hash value of uuid from super block */ 556 u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash; 557 int i; 558 559 /* hash extended attribute name */ 560 for (i = 0; i < name_len; i++) { 561 hash = (hash << OCFS2_HASH_SHIFT) ^ 562 (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^ 563 *name++; 564 } 565 566 return hash; 567 } 568 569 static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len) 570 { 571 return namevalue_size(name_len, value_len) + 572 sizeof(struct ocfs2_xattr_entry); 573 } 574 575 static int ocfs2_xi_entry_usage(struct ocfs2_xattr_info *xi) 576 { 577 return namevalue_size_xi(xi) + 578 sizeof(struct ocfs2_xattr_entry); 579 } 580 581 static int ocfs2_xe_entry_usage(struct ocfs2_xattr_entry *xe) 582 { 583 return namevalue_size_xe(xe) + 584 sizeof(struct ocfs2_xattr_entry); 585 } 586 587 int ocfs2_calc_security_init(struct inode *dir, 588 struct ocfs2_security_xattr_info *si, 589 int *want_clusters, 590 int *xattr_credits, 591 struct ocfs2_alloc_context **xattr_ac) 592 { 593 int ret = 0; 594 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); 595 int s_size = ocfs2_xattr_entry_real_size(strlen(si->name), 596 si->value_len); 597 598 /* 599 * The max space of security xattr taken inline is 600 * 256(name) + 80(value) + 16(entry) = 352 bytes, 601 * So reserve one metadata block for it is ok. 602 */ 603 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE || 604 s_size > OCFS2_XATTR_FREE_IN_IBODY) { 605 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac); 606 if (ret) { 607 mlog_errno(ret); 608 return ret; 609 } 610 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; 611 } 612 613 /* reserve clusters for xattr value which will be set in B tree*/ 614 if (si->value_len > OCFS2_XATTR_INLINE_SIZE) { 615 int new_clusters = ocfs2_clusters_for_bytes(dir->i_sb, 616 si->value_len); 617 618 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb, 619 new_clusters); 620 *want_clusters += new_clusters; 621 } 622 return ret; 623 } 624 625 int ocfs2_calc_xattr_init(struct inode *dir, 626 struct buffer_head *dir_bh, 627 int mode, 628 struct ocfs2_security_xattr_info *si, 629 int *want_clusters, 630 int *xattr_credits, 631 int *want_meta) 632 { 633 int ret = 0; 634 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); 635 int s_size = 0, a_size = 0, acl_len = 0, new_clusters; 636 637 if (si->enable) 638 s_size = ocfs2_xattr_entry_real_size(strlen(si->name), 639 si->value_len); 640 641 if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) { 642 acl_len = ocfs2_xattr_get_nolock(dir, dir_bh, 643 OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT, 644 "", NULL, 0); 645 if (acl_len > 0) { 646 a_size = ocfs2_xattr_entry_real_size(0, acl_len); 647 if (S_ISDIR(mode)) 648 a_size <<= 1; 649 } else if (acl_len != 0 && acl_len != -ENODATA) { 650 mlog_errno(ret); 651 return ret; 652 } 653 } 654 655 if (!(s_size + a_size)) 656 return ret; 657 658 /* 659 * The max space of security xattr taken inline is 660 * 256(name) + 80(value) + 16(entry) = 352 bytes, 661 * The max space of acl xattr taken inline is 662 * 80(value) + 16(entry) * 2(if directory) = 192 bytes, 663 * when blocksize = 512, may reserve one more cluser for 664 * xattr bucket, otherwise reserve one metadata block 665 * for them is ok. 666 * If this is a new directory with inline data, 667 * we choose to reserve the entire inline area for 668 * directory contents and force an external xattr block. 669 */ 670 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE || 671 (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) || 672 (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) { 673 *want_meta = *want_meta + 1; 674 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; 675 } 676 677 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE && 678 (s_size + a_size) > OCFS2_XATTR_FREE_IN_BLOCK(dir)) { 679 *want_clusters += 1; 680 *xattr_credits += ocfs2_blocks_per_xattr_bucket(dir->i_sb); 681 } 682 683 /* 684 * reserve credits and clusters for xattrs which has large value 685 * and have to be set outside 686 */ 687 if (si->enable && si->value_len > OCFS2_XATTR_INLINE_SIZE) { 688 new_clusters = ocfs2_clusters_for_bytes(dir->i_sb, 689 si->value_len); 690 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb, 691 new_clusters); 692 *want_clusters += new_clusters; 693 } 694 if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL && 695 acl_len > OCFS2_XATTR_INLINE_SIZE) { 696 /* for directory, it has DEFAULT and ACCESS two types of acls */ 697 new_clusters = (S_ISDIR(mode) ? 2 : 1) * 698 ocfs2_clusters_for_bytes(dir->i_sb, acl_len); 699 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb, 700 new_clusters); 701 *want_clusters += new_clusters; 702 } 703 704 return ret; 705 } 706 707 static int ocfs2_xattr_extend_allocation(struct inode *inode, 708 u32 clusters_to_add, 709 struct ocfs2_xattr_value_buf *vb, 710 struct ocfs2_xattr_set_ctxt *ctxt) 711 { 712 int status = 0, credits; 713 handle_t *handle = ctxt->handle; 714 enum ocfs2_alloc_restarted why; 715 u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters); 716 struct ocfs2_extent_tree et; 717 718 mlog(0, "(clusters_to_add for xattr= %u)\n", clusters_to_add); 719 720 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); 721 722 while (clusters_to_add) { 723 status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh, 724 OCFS2_JOURNAL_ACCESS_WRITE); 725 if (status < 0) { 726 mlog_errno(status); 727 break; 728 } 729 730 prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters); 731 status = ocfs2_add_clusters_in_btree(handle, 732 &et, 733 &logical_start, 734 clusters_to_add, 735 0, 736 ctxt->data_ac, 737 ctxt->meta_ac, 738 &why); 739 if ((status < 0) && (status != -EAGAIN)) { 740 if (status != -ENOSPC) 741 mlog_errno(status); 742 break; 743 } 744 745 ocfs2_journal_dirty(handle, vb->vb_bh); 746 747 clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - 748 prev_clusters; 749 750 if (why != RESTART_NONE && clusters_to_add) { 751 /* 752 * We can only fail in case the alloc file doesn't give 753 * up enough clusters. 754 */ 755 BUG_ON(why == RESTART_META); 756 757 mlog(0, "restarting xattr value extension for %u" 758 " clusters,.\n", clusters_to_add); 759 credits = ocfs2_calc_extend_credits(inode->i_sb, 760 &vb->vb_xv->xr_list, 761 clusters_to_add); 762 status = ocfs2_extend_trans(handle, credits); 763 if (status < 0) { 764 status = -ENOMEM; 765 mlog_errno(status); 766 break; 767 } 768 } 769 } 770 771 return status; 772 } 773 774 static int __ocfs2_remove_xattr_range(struct inode *inode, 775 struct ocfs2_xattr_value_buf *vb, 776 u32 cpos, u32 phys_cpos, u32 len, 777 unsigned int ext_flags, 778 struct ocfs2_xattr_set_ctxt *ctxt) 779 { 780 int ret; 781 u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); 782 handle_t *handle = ctxt->handle; 783 struct ocfs2_extent_tree et; 784 785 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); 786 787 ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh, 788 OCFS2_JOURNAL_ACCESS_WRITE); 789 if (ret) { 790 mlog_errno(ret); 791 goto out; 792 } 793 794 ret = ocfs2_remove_extent(handle, &et, cpos, len, ctxt->meta_ac, 795 &ctxt->dealloc); 796 if (ret) { 797 mlog_errno(ret); 798 goto out; 799 } 800 801 le32_add_cpu(&vb->vb_xv->xr_clusters, -len); 802 ocfs2_journal_dirty(handle, vb->vb_bh); 803 804 if (ext_flags & OCFS2_EXT_REFCOUNTED) 805 ret = ocfs2_decrease_refcount(inode, handle, 806 ocfs2_blocks_to_clusters(inode->i_sb, 807 phys_blkno), 808 len, ctxt->meta_ac, &ctxt->dealloc, 1); 809 else 810 ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc, 811 phys_blkno, len); 812 if (ret) 813 mlog_errno(ret); 814 815 out: 816 return ret; 817 } 818 819 static int ocfs2_xattr_shrink_size(struct inode *inode, 820 u32 old_clusters, 821 u32 new_clusters, 822 struct ocfs2_xattr_value_buf *vb, 823 struct ocfs2_xattr_set_ctxt *ctxt) 824 { 825 int ret = 0; 826 unsigned int ext_flags; 827 u32 trunc_len, cpos, phys_cpos, alloc_size; 828 u64 block; 829 830 if (old_clusters <= new_clusters) 831 return 0; 832 833 cpos = new_clusters; 834 trunc_len = old_clusters - new_clusters; 835 while (trunc_len) { 836 ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos, 837 &alloc_size, 838 &vb->vb_xv->xr_list, &ext_flags); 839 if (ret) { 840 mlog_errno(ret); 841 goto out; 842 } 843 844 if (alloc_size > trunc_len) 845 alloc_size = trunc_len; 846 847 ret = __ocfs2_remove_xattr_range(inode, vb, cpos, 848 phys_cpos, alloc_size, 849 ext_flags, ctxt); 850 if (ret) { 851 mlog_errno(ret); 852 goto out; 853 } 854 855 block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); 856 ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), 857 block, alloc_size); 858 cpos += alloc_size; 859 trunc_len -= alloc_size; 860 } 861 862 out: 863 return ret; 864 } 865 866 static int ocfs2_xattr_value_truncate(struct inode *inode, 867 struct ocfs2_xattr_value_buf *vb, 868 int len, 869 struct ocfs2_xattr_set_ctxt *ctxt) 870 { 871 int ret; 872 u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len); 873 u32 old_clusters = le32_to_cpu(vb->vb_xv->xr_clusters); 874 875 if (new_clusters == old_clusters) 876 return 0; 877 878 if (new_clusters > old_clusters) 879 ret = ocfs2_xattr_extend_allocation(inode, 880 new_clusters - old_clusters, 881 vb, ctxt); 882 else 883 ret = ocfs2_xattr_shrink_size(inode, 884 old_clusters, new_clusters, 885 vb, ctxt); 886 887 return ret; 888 } 889 890 static int ocfs2_xattr_list_entry(char *buffer, size_t size, 891 size_t *result, const char *prefix, 892 const char *name, int name_len) 893 { 894 char *p = buffer + *result; 895 int prefix_len = strlen(prefix); 896 int total_len = prefix_len + name_len + 1; 897 898 *result += total_len; 899 900 /* we are just looking for how big our buffer needs to be */ 901 if (!size) 902 return 0; 903 904 if (*result > size) 905 return -ERANGE; 906 907 memcpy(p, prefix, prefix_len); 908 memcpy(p + prefix_len, name, name_len); 909 p[prefix_len + name_len] = '\0'; 910 911 return 0; 912 } 913 914 static int ocfs2_xattr_list_entries(struct inode *inode, 915 struct ocfs2_xattr_header *header, 916 char *buffer, size_t buffer_size) 917 { 918 size_t result = 0; 919 int i, type, ret; 920 const char *prefix, *name; 921 922 for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) { 923 struct ocfs2_xattr_entry *entry = &header->xh_entries[i]; 924 type = ocfs2_xattr_get_type(entry); 925 prefix = ocfs2_xattr_prefix(type); 926 927 if (prefix) { 928 name = (const char *)header + 929 le16_to_cpu(entry->xe_name_offset); 930 931 ret = ocfs2_xattr_list_entry(buffer, buffer_size, 932 &result, prefix, name, 933 entry->xe_name_len); 934 if (ret) 935 return ret; 936 } 937 } 938 939 return result; 940 } 941 942 int ocfs2_has_inline_xattr_value_outside(struct inode *inode, 943 struct ocfs2_dinode *di) 944 { 945 struct ocfs2_xattr_header *xh; 946 int i; 947 948 xh = (struct ocfs2_xattr_header *) 949 ((void *)di + inode->i_sb->s_blocksize - 950 le16_to_cpu(di->i_xattr_inline_size)); 951 952 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) 953 if (!ocfs2_xattr_is_local(&xh->xh_entries[i])) 954 return 1; 955 956 return 0; 957 } 958 959 static int ocfs2_xattr_ibody_list(struct inode *inode, 960 struct ocfs2_dinode *di, 961 char *buffer, 962 size_t buffer_size) 963 { 964 struct ocfs2_xattr_header *header = NULL; 965 struct ocfs2_inode_info *oi = OCFS2_I(inode); 966 int ret = 0; 967 968 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) 969 return ret; 970 971 header = (struct ocfs2_xattr_header *) 972 ((void *)di + inode->i_sb->s_blocksize - 973 le16_to_cpu(di->i_xattr_inline_size)); 974 975 ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size); 976 977 return ret; 978 } 979 980 static int ocfs2_xattr_block_list(struct inode *inode, 981 struct ocfs2_dinode *di, 982 char *buffer, 983 size_t buffer_size) 984 { 985 struct buffer_head *blk_bh = NULL; 986 struct ocfs2_xattr_block *xb; 987 int ret = 0; 988 989 if (!di->i_xattr_loc) 990 return ret; 991 992 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc), 993 &blk_bh); 994 if (ret < 0) { 995 mlog_errno(ret); 996 return ret; 997 } 998 999 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 1000 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 1001 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header; 1002 ret = ocfs2_xattr_list_entries(inode, header, 1003 buffer, buffer_size); 1004 } else 1005 ret = ocfs2_xattr_tree_list_index_block(inode, blk_bh, 1006 buffer, buffer_size); 1007 1008 brelse(blk_bh); 1009 1010 return ret; 1011 } 1012 1013 ssize_t ocfs2_listxattr(struct dentry *dentry, 1014 char *buffer, 1015 size_t size) 1016 { 1017 int ret = 0, i_ret = 0, b_ret = 0; 1018 struct buffer_head *di_bh = NULL; 1019 struct ocfs2_dinode *di = NULL; 1020 struct ocfs2_inode_info *oi = OCFS2_I(dentry->d_inode); 1021 1022 if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb))) 1023 return -EOPNOTSUPP; 1024 1025 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) 1026 return ret; 1027 1028 ret = ocfs2_inode_lock(dentry->d_inode, &di_bh, 0); 1029 if (ret < 0) { 1030 mlog_errno(ret); 1031 return ret; 1032 } 1033 1034 di = (struct ocfs2_dinode *)di_bh->b_data; 1035 1036 down_read(&oi->ip_xattr_sem); 1037 i_ret = ocfs2_xattr_ibody_list(dentry->d_inode, di, buffer, size); 1038 if (i_ret < 0) 1039 b_ret = 0; 1040 else { 1041 if (buffer) { 1042 buffer += i_ret; 1043 size -= i_ret; 1044 } 1045 b_ret = ocfs2_xattr_block_list(dentry->d_inode, di, 1046 buffer, size); 1047 if (b_ret < 0) 1048 i_ret = 0; 1049 } 1050 up_read(&oi->ip_xattr_sem); 1051 ocfs2_inode_unlock(dentry->d_inode, 0); 1052 1053 brelse(di_bh); 1054 1055 return i_ret + b_ret; 1056 } 1057 1058 static int ocfs2_xattr_find_entry(int name_index, 1059 const char *name, 1060 struct ocfs2_xattr_search *xs) 1061 { 1062 struct ocfs2_xattr_entry *entry; 1063 size_t name_len; 1064 int i, cmp = 1; 1065 1066 if (name == NULL) 1067 return -EINVAL; 1068 1069 name_len = strlen(name); 1070 entry = xs->here; 1071 for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) { 1072 cmp = name_index - ocfs2_xattr_get_type(entry); 1073 if (!cmp) 1074 cmp = name_len - entry->xe_name_len; 1075 if (!cmp) 1076 cmp = memcmp(name, (xs->base + 1077 le16_to_cpu(entry->xe_name_offset)), 1078 name_len); 1079 if (cmp == 0) 1080 break; 1081 entry += 1; 1082 } 1083 xs->here = entry; 1084 1085 return cmp ? -ENODATA : 0; 1086 } 1087 1088 static int ocfs2_xattr_get_value_outside(struct inode *inode, 1089 struct ocfs2_xattr_value_root *xv, 1090 void *buffer, 1091 size_t len) 1092 { 1093 u32 cpos, p_cluster, num_clusters, bpc, clusters; 1094 u64 blkno; 1095 int i, ret = 0; 1096 size_t cplen, blocksize; 1097 struct buffer_head *bh = NULL; 1098 struct ocfs2_extent_list *el; 1099 1100 el = &xv->xr_list; 1101 clusters = le32_to_cpu(xv->xr_clusters); 1102 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 1103 blocksize = inode->i_sb->s_blocksize; 1104 1105 cpos = 0; 1106 while (cpos < clusters) { 1107 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, 1108 &num_clusters, el, NULL); 1109 if (ret) { 1110 mlog_errno(ret); 1111 goto out; 1112 } 1113 1114 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 1115 /* Copy ocfs2_xattr_value */ 1116 for (i = 0; i < num_clusters * bpc; i++, blkno++) { 1117 ret = ocfs2_read_block(INODE_CACHE(inode), blkno, 1118 &bh, NULL); 1119 if (ret) { 1120 mlog_errno(ret); 1121 goto out; 1122 } 1123 1124 cplen = len >= blocksize ? blocksize : len; 1125 memcpy(buffer, bh->b_data, cplen); 1126 len -= cplen; 1127 buffer += cplen; 1128 1129 brelse(bh); 1130 bh = NULL; 1131 if (len == 0) 1132 break; 1133 } 1134 cpos += num_clusters; 1135 } 1136 out: 1137 return ret; 1138 } 1139 1140 static int ocfs2_xattr_ibody_get(struct inode *inode, 1141 int name_index, 1142 const char *name, 1143 void *buffer, 1144 size_t buffer_size, 1145 struct ocfs2_xattr_search *xs) 1146 { 1147 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1148 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 1149 struct ocfs2_xattr_value_root *xv; 1150 size_t size; 1151 int ret = 0; 1152 1153 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) 1154 return -ENODATA; 1155 1156 xs->end = (void *)di + inode->i_sb->s_blocksize; 1157 xs->header = (struct ocfs2_xattr_header *) 1158 (xs->end - le16_to_cpu(di->i_xattr_inline_size)); 1159 xs->base = (void *)xs->header; 1160 xs->here = xs->header->xh_entries; 1161 1162 ret = ocfs2_xattr_find_entry(name_index, name, xs); 1163 if (ret) 1164 return ret; 1165 size = le64_to_cpu(xs->here->xe_value_size); 1166 if (buffer) { 1167 if (size > buffer_size) 1168 return -ERANGE; 1169 if (ocfs2_xattr_is_local(xs->here)) { 1170 memcpy(buffer, (void *)xs->base + 1171 le16_to_cpu(xs->here->xe_name_offset) + 1172 OCFS2_XATTR_SIZE(xs->here->xe_name_len), size); 1173 } else { 1174 xv = (struct ocfs2_xattr_value_root *) 1175 (xs->base + le16_to_cpu( 1176 xs->here->xe_name_offset) + 1177 OCFS2_XATTR_SIZE(xs->here->xe_name_len)); 1178 ret = ocfs2_xattr_get_value_outside(inode, xv, 1179 buffer, size); 1180 if (ret < 0) { 1181 mlog_errno(ret); 1182 return ret; 1183 } 1184 } 1185 } 1186 1187 return size; 1188 } 1189 1190 static int ocfs2_xattr_block_get(struct inode *inode, 1191 int name_index, 1192 const char *name, 1193 void *buffer, 1194 size_t buffer_size, 1195 struct ocfs2_xattr_search *xs) 1196 { 1197 struct ocfs2_xattr_block *xb; 1198 struct ocfs2_xattr_value_root *xv; 1199 size_t size; 1200 int ret = -ENODATA, name_offset, name_len, i; 1201 int uninitialized_var(block_off); 1202 1203 xs->bucket = ocfs2_xattr_bucket_new(inode); 1204 if (!xs->bucket) { 1205 ret = -ENOMEM; 1206 mlog_errno(ret); 1207 goto cleanup; 1208 } 1209 1210 ret = ocfs2_xattr_block_find(inode, name_index, name, xs); 1211 if (ret) { 1212 mlog_errno(ret); 1213 goto cleanup; 1214 } 1215 1216 if (xs->not_found) { 1217 ret = -ENODATA; 1218 goto cleanup; 1219 } 1220 1221 xb = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; 1222 size = le64_to_cpu(xs->here->xe_value_size); 1223 if (buffer) { 1224 ret = -ERANGE; 1225 if (size > buffer_size) 1226 goto cleanup; 1227 1228 name_offset = le16_to_cpu(xs->here->xe_name_offset); 1229 name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len); 1230 i = xs->here - xs->header->xh_entries; 1231 1232 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 1233 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 1234 bucket_xh(xs->bucket), 1235 i, 1236 &block_off, 1237 &name_offset); 1238 xs->base = bucket_block(xs->bucket, block_off); 1239 } 1240 if (ocfs2_xattr_is_local(xs->here)) { 1241 memcpy(buffer, (void *)xs->base + 1242 name_offset + name_len, size); 1243 } else { 1244 xv = (struct ocfs2_xattr_value_root *) 1245 (xs->base + name_offset + name_len); 1246 ret = ocfs2_xattr_get_value_outside(inode, xv, 1247 buffer, size); 1248 if (ret < 0) { 1249 mlog_errno(ret); 1250 goto cleanup; 1251 } 1252 } 1253 } 1254 ret = size; 1255 cleanup: 1256 ocfs2_xattr_bucket_free(xs->bucket); 1257 1258 brelse(xs->xattr_bh); 1259 xs->xattr_bh = NULL; 1260 return ret; 1261 } 1262 1263 int ocfs2_xattr_get_nolock(struct inode *inode, 1264 struct buffer_head *di_bh, 1265 int name_index, 1266 const char *name, 1267 void *buffer, 1268 size_t buffer_size) 1269 { 1270 int ret; 1271 struct ocfs2_dinode *di = NULL; 1272 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1273 struct ocfs2_xattr_search xis = { 1274 .not_found = -ENODATA, 1275 }; 1276 struct ocfs2_xattr_search xbs = { 1277 .not_found = -ENODATA, 1278 }; 1279 1280 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) 1281 return -EOPNOTSUPP; 1282 1283 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) 1284 ret = -ENODATA; 1285 1286 xis.inode_bh = xbs.inode_bh = di_bh; 1287 di = (struct ocfs2_dinode *)di_bh->b_data; 1288 1289 ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer, 1290 buffer_size, &xis); 1291 if (ret == -ENODATA && di->i_xattr_loc) 1292 ret = ocfs2_xattr_block_get(inode, name_index, name, buffer, 1293 buffer_size, &xbs); 1294 1295 return ret; 1296 } 1297 1298 /* ocfs2_xattr_get() 1299 * 1300 * Copy an extended attribute into the buffer provided. 1301 * Buffer is NULL to compute the size of buffer required. 1302 */ 1303 static int ocfs2_xattr_get(struct inode *inode, 1304 int name_index, 1305 const char *name, 1306 void *buffer, 1307 size_t buffer_size) 1308 { 1309 int ret; 1310 struct buffer_head *di_bh = NULL; 1311 1312 ret = ocfs2_inode_lock(inode, &di_bh, 0); 1313 if (ret < 0) { 1314 mlog_errno(ret); 1315 return ret; 1316 } 1317 down_read(&OCFS2_I(inode)->ip_xattr_sem); 1318 ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index, 1319 name, buffer, buffer_size); 1320 up_read(&OCFS2_I(inode)->ip_xattr_sem); 1321 1322 ocfs2_inode_unlock(inode, 0); 1323 1324 brelse(di_bh); 1325 1326 return ret; 1327 } 1328 1329 static int __ocfs2_xattr_set_value_outside(struct inode *inode, 1330 handle_t *handle, 1331 struct ocfs2_xattr_value_buf *vb, 1332 const void *value, 1333 int value_len) 1334 { 1335 int ret = 0, i, cp_len; 1336 u16 blocksize = inode->i_sb->s_blocksize; 1337 u32 p_cluster, num_clusters; 1338 u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 1339 u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len); 1340 u64 blkno; 1341 struct buffer_head *bh = NULL; 1342 unsigned int ext_flags; 1343 struct ocfs2_xattr_value_root *xv = vb->vb_xv; 1344 1345 BUG_ON(clusters > le32_to_cpu(xv->xr_clusters)); 1346 1347 while (cpos < clusters) { 1348 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, 1349 &num_clusters, &xv->xr_list, 1350 &ext_flags); 1351 if (ret) { 1352 mlog_errno(ret); 1353 goto out; 1354 } 1355 1356 BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED); 1357 1358 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 1359 1360 for (i = 0; i < num_clusters * bpc; i++, blkno++) { 1361 ret = ocfs2_read_block(INODE_CACHE(inode), blkno, 1362 &bh, NULL); 1363 if (ret) { 1364 mlog_errno(ret); 1365 goto out; 1366 } 1367 1368 ret = ocfs2_journal_access(handle, 1369 INODE_CACHE(inode), 1370 bh, 1371 OCFS2_JOURNAL_ACCESS_WRITE); 1372 if (ret < 0) { 1373 mlog_errno(ret); 1374 goto out; 1375 } 1376 1377 cp_len = value_len > blocksize ? blocksize : value_len; 1378 memcpy(bh->b_data, value, cp_len); 1379 value_len -= cp_len; 1380 value += cp_len; 1381 if (cp_len < blocksize) 1382 memset(bh->b_data + cp_len, 0, 1383 blocksize - cp_len); 1384 1385 ocfs2_journal_dirty(handle, bh); 1386 brelse(bh); 1387 bh = NULL; 1388 1389 /* 1390 * XXX: do we need to empty all the following 1391 * blocks in this cluster? 1392 */ 1393 if (!value_len) 1394 break; 1395 } 1396 cpos += num_clusters; 1397 } 1398 out: 1399 brelse(bh); 1400 1401 return ret; 1402 } 1403 1404 static int ocfs2_xa_check_space_helper(int needed_space, int free_start, 1405 int num_entries) 1406 { 1407 int free_space; 1408 1409 if (!needed_space) 1410 return 0; 1411 1412 free_space = free_start - 1413 sizeof(struct ocfs2_xattr_header) - 1414 (num_entries * sizeof(struct ocfs2_xattr_entry)) - 1415 OCFS2_XATTR_HEADER_GAP; 1416 if (free_space < 0) 1417 return -EIO; 1418 if (free_space < needed_space) 1419 return -ENOSPC; 1420 1421 return 0; 1422 } 1423 1424 static int ocfs2_xa_journal_access(handle_t *handle, struct ocfs2_xa_loc *loc, 1425 int type) 1426 { 1427 return loc->xl_ops->xlo_journal_access(handle, loc, type); 1428 } 1429 1430 static void ocfs2_xa_journal_dirty(handle_t *handle, struct ocfs2_xa_loc *loc) 1431 { 1432 loc->xl_ops->xlo_journal_dirty(handle, loc); 1433 } 1434 1435 /* Give a pointer into the storage for the given offset */ 1436 static void *ocfs2_xa_offset_pointer(struct ocfs2_xa_loc *loc, int offset) 1437 { 1438 BUG_ON(offset >= loc->xl_size); 1439 return loc->xl_ops->xlo_offset_pointer(loc, offset); 1440 } 1441 1442 /* 1443 * Wipe the name+value pair and allow the storage to reclaim it. This 1444 * must be followed by either removal of the entry or a call to 1445 * ocfs2_xa_add_namevalue(). 1446 */ 1447 static void ocfs2_xa_wipe_namevalue(struct ocfs2_xa_loc *loc) 1448 { 1449 loc->xl_ops->xlo_wipe_namevalue(loc); 1450 } 1451 1452 /* 1453 * Find lowest offset to a name+value pair. This is the start of our 1454 * downward-growing free space. 1455 */ 1456 static int ocfs2_xa_get_free_start(struct ocfs2_xa_loc *loc) 1457 { 1458 return loc->xl_ops->xlo_get_free_start(loc); 1459 } 1460 1461 /* Can we reuse loc->xl_entry for xi? */ 1462 static int ocfs2_xa_can_reuse_entry(struct ocfs2_xa_loc *loc, 1463 struct ocfs2_xattr_info *xi) 1464 { 1465 return loc->xl_ops->xlo_can_reuse(loc, xi); 1466 } 1467 1468 /* How much free space is needed to set the new value */ 1469 static int ocfs2_xa_check_space(struct ocfs2_xa_loc *loc, 1470 struct ocfs2_xattr_info *xi) 1471 { 1472 return loc->xl_ops->xlo_check_space(loc, xi); 1473 } 1474 1475 static void ocfs2_xa_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) 1476 { 1477 loc->xl_ops->xlo_add_entry(loc, name_hash); 1478 loc->xl_entry->xe_name_hash = cpu_to_le32(name_hash); 1479 /* 1480 * We can't leave the new entry's xe_name_offset at zero or 1481 * add_namevalue() will go nuts. We set it to the size of our 1482 * storage so that it can never be less than any other entry. 1483 */ 1484 loc->xl_entry->xe_name_offset = cpu_to_le16(loc->xl_size); 1485 } 1486 1487 static void ocfs2_xa_add_namevalue(struct ocfs2_xa_loc *loc, 1488 struct ocfs2_xattr_info *xi) 1489 { 1490 int size = namevalue_size_xi(xi); 1491 int nameval_offset; 1492 char *nameval_buf; 1493 1494 loc->xl_ops->xlo_add_namevalue(loc, size); 1495 loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len); 1496 loc->xl_entry->xe_name_len = xi->xi_name_len; 1497 ocfs2_xattr_set_type(loc->xl_entry, xi->xi_name_index); 1498 ocfs2_xattr_set_local(loc->xl_entry, 1499 xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE); 1500 1501 nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 1502 nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset); 1503 memset(nameval_buf, 0, size); 1504 memcpy(nameval_buf, xi->xi_name, xi->xi_name_len); 1505 } 1506 1507 static void ocfs2_xa_fill_value_buf(struct ocfs2_xa_loc *loc, 1508 struct ocfs2_xattr_value_buf *vb) 1509 { 1510 int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 1511 int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len); 1512 1513 /* Value bufs are for value trees */ 1514 BUG_ON(ocfs2_xattr_is_local(loc->xl_entry)); 1515 BUG_ON(namevalue_size_xe(loc->xl_entry) != 1516 (name_size + OCFS2_XATTR_ROOT_SIZE)); 1517 1518 loc->xl_ops->xlo_fill_value_buf(loc, vb); 1519 vb->vb_xv = 1520 (struct ocfs2_xattr_value_root *)ocfs2_xa_offset_pointer(loc, 1521 nameval_offset + 1522 name_size); 1523 } 1524 1525 static int ocfs2_xa_block_journal_access(handle_t *handle, 1526 struct ocfs2_xa_loc *loc, int type) 1527 { 1528 struct buffer_head *bh = loc->xl_storage; 1529 ocfs2_journal_access_func access; 1530 1531 if (loc->xl_size == (bh->b_size - 1532 offsetof(struct ocfs2_xattr_block, 1533 xb_attrs.xb_header))) 1534 access = ocfs2_journal_access_xb; 1535 else 1536 access = ocfs2_journal_access_di; 1537 return access(handle, INODE_CACHE(loc->xl_inode), bh, type); 1538 } 1539 1540 static void ocfs2_xa_block_journal_dirty(handle_t *handle, 1541 struct ocfs2_xa_loc *loc) 1542 { 1543 struct buffer_head *bh = loc->xl_storage; 1544 1545 ocfs2_journal_dirty(handle, bh); 1546 } 1547 1548 static void *ocfs2_xa_block_offset_pointer(struct ocfs2_xa_loc *loc, 1549 int offset) 1550 { 1551 return (char *)loc->xl_header + offset; 1552 } 1553 1554 static int ocfs2_xa_block_can_reuse(struct ocfs2_xa_loc *loc, 1555 struct ocfs2_xattr_info *xi) 1556 { 1557 /* 1558 * Block storage is strict. If the sizes aren't exact, we will 1559 * remove the old one and reinsert the new. 1560 */ 1561 return namevalue_size_xe(loc->xl_entry) == 1562 namevalue_size_xi(xi); 1563 } 1564 1565 static int ocfs2_xa_block_get_free_start(struct ocfs2_xa_loc *loc) 1566 { 1567 struct ocfs2_xattr_header *xh = loc->xl_header; 1568 int i, count = le16_to_cpu(xh->xh_count); 1569 int offset, free_start = loc->xl_size; 1570 1571 for (i = 0; i < count; i++) { 1572 offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset); 1573 if (offset < free_start) 1574 free_start = offset; 1575 } 1576 1577 return free_start; 1578 } 1579 1580 static int ocfs2_xa_block_check_space(struct ocfs2_xa_loc *loc, 1581 struct ocfs2_xattr_info *xi) 1582 { 1583 int count = le16_to_cpu(loc->xl_header->xh_count); 1584 int free_start = ocfs2_xa_get_free_start(loc); 1585 int needed_space = ocfs2_xi_entry_usage(xi); 1586 1587 /* 1588 * Block storage will reclaim the original entry before inserting 1589 * the new value, so we only need the difference. If the new 1590 * entry is smaller than the old one, we don't need anything. 1591 */ 1592 if (loc->xl_entry) { 1593 /* Don't need space if we're reusing! */ 1594 if (ocfs2_xa_can_reuse_entry(loc, xi)) 1595 needed_space = 0; 1596 else 1597 needed_space -= ocfs2_xe_entry_usage(loc->xl_entry); 1598 } 1599 if (needed_space < 0) 1600 needed_space = 0; 1601 return ocfs2_xa_check_space_helper(needed_space, free_start, count); 1602 } 1603 1604 /* 1605 * Block storage for xattrs keeps the name+value pairs compacted. When 1606 * we remove one, we have to shift any that preceded it towards the end. 1607 */ 1608 static void ocfs2_xa_block_wipe_namevalue(struct ocfs2_xa_loc *loc) 1609 { 1610 int i, offset; 1611 int namevalue_offset, first_namevalue_offset, namevalue_size; 1612 struct ocfs2_xattr_entry *entry = loc->xl_entry; 1613 struct ocfs2_xattr_header *xh = loc->xl_header; 1614 int count = le16_to_cpu(xh->xh_count); 1615 1616 namevalue_offset = le16_to_cpu(entry->xe_name_offset); 1617 namevalue_size = namevalue_size_xe(entry); 1618 first_namevalue_offset = ocfs2_xa_get_free_start(loc); 1619 1620 /* Shift the name+value pairs */ 1621 memmove((char *)xh + first_namevalue_offset + namevalue_size, 1622 (char *)xh + first_namevalue_offset, 1623 namevalue_offset - first_namevalue_offset); 1624 memset((char *)xh + first_namevalue_offset, 0, namevalue_size); 1625 1626 /* Now tell xh->xh_entries about it */ 1627 for (i = 0; i < count; i++) { 1628 offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset); 1629 if (offset <= namevalue_offset) 1630 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, 1631 namevalue_size); 1632 } 1633 1634 /* 1635 * Note that we don't update xh_free_start or xh_name_value_len 1636 * because they're not used in block-stored xattrs. 1637 */ 1638 } 1639 1640 static void ocfs2_xa_block_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) 1641 { 1642 int count = le16_to_cpu(loc->xl_header->xh_count); 1643 loc->xl_entry = &(loc->xl_header->xh_entries[count]); 1644 le16_add_cpu(&loc->xl_header->xh_count, 1); 1645 memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry)); 1646 } 1647 1648 static void ocfs2_xa_block_add_namevalue(struct ocfs2_xa_loc *loc, int size) 1649 { 1650 int free_start = ocfs2_xa_get_free_start(loc); 1651 1652 loc->xl_entry->xe_name_offset = cpu_to_le16(free_start - size); 1653 } 1654 1655 static void ocfs2_xa_block_fill_value_buf(struct ocfs2_xa_loc *loc, 1656 struct ocfs2_xattr_value_buf *vb) 1657 { 1658 struct buffer_head *bh = loc->xl_storage; 1659 1660 if (loc->xl_size == (bh->b_size - 1661 offsetof(struct ocfs2_xattr_block, 1662 xb_attrs.xb_header))) 1663 vb->vb_access = ocfs2_journal_access_xb; 1664 else 1665 vb->vb_access = ocfs2_journal_access_di; 1666 vb->vb_bh = bh; 1667 } 1668 1669 /* 1670 * Operations for xattrs stored in blocks. This includes inline inode 1671 * storage and unindexed ocfs2_xattr_blocks. 1672 */ 1673 static const struct ocfs2_xa_loc_operations ocfs2_xa_block_loc_ops = { 1674 .xlo_journal_access = ocfs2_xa_block_journal_access, 1675 .xlo_journal_dirty = ocfs2_xa_block_journal_dirty, 1676 .xlo_offset_pointer = ocfs2_xa_block_offset_pointer, 1677 .xlo_check_space = ocfs2_xa_block_check_space, 1678 .xlo_can_reuse = ocfs2_xa_block_can_reuse, 1679 .xlo_get_free_start = ocfs2_xa_block_get_free_start, 1680 .xlo_wipe_namevalue = ocfs2_xa_block_wipe_namevalue, 1681 .xlo_add_entry = ocfs2_xa_block_add_entry, 1682 .xlo_add_namevalue = ocfs2_xa_block_add_namevalue, 1683 .xlo_fill_value_buf = ocfs2_xa_block_fill_value_buf, 1684 }; 1685 1686 static int ocfs2_xa_bucket_journal_access(handle_t *handle, 1687 struct ocfs2_xa_loc *loc, int type) 1688 { 1689 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1690 1691 return ocfs2_xattr_bucket_journal_access(handle, bucket, type); 1692 } 1693 1694 static void ocfs2_xa_bucket_journal_dirty(handle_t *handle, 1695 struct ocfs2_xa_loc *loc) 1696 { 1697 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1698 1699 ocfs2_xattr_bucket_journal_dirty(handle, bucket); 1700 } 1701 1702 static void *ocfs2_xa_bucket_offset_pointer(struct ocfs2_xa_loc *loc, 1703 int offset) 1704 { 1705 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1706 int block, block_offset; 1707 1708 /* The header is at the front of the bucket */ 1709 block = offset >> loc->xl_inode->i_sb->s_blocksize_bits; 1710 block_offset = offset % loc->xl_inode->i_sb->s_blocksize; 1711 1712 return bucket_block(bucket, block) + block_offset; 1713 } 1714 1715 static int ocfs2_xa_bucket_can_reuse(struct ocfs2_xa_loc *loc, 1716 struct ocfs2_xattr_info *xi) 1717 { 1718 return namevalue_size_xe(loc->xl_entry) >= 1719 namevalue_size_xi(xi); 1720 } 1721 1722 static int ocfs2_xa_bucket_get_free_start(struct ocfs2_xa_loc *loc) 1723 { 1724 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1725 return le16_to_cpu(bucket_xh(bucket)->xh_free_start); 1726 } 1727 1728 static int ocfs2_bucket_align_free_start(struct super_block *sb, 1729 int free_start, int size) 1730 { 1731 /* 1732 * We need to make sure that the name+value pair fits within 1733 * one block. 1734 */ 1735 if (((free_start - size) >> sb->s_blocksize_bits) != 1736 ((free_start - 1) >> sb->s_blocksize_bits)) 1737 free_start -= free_start % sb->s_blocksize; 1738 1739 return free_start; 1740 } 1741 1742 static int ocfs2_xa_bucket_check_space(struct ocfs2_xa_loc *loc, 1743 struct ocfs2_xattr_info *xi) 1744 { 1745 int rc; 1746 int count = le16_to_cpu(loc->xl_header->xh_count); 1747 int free_start = ocfs2_xa_get_free_start(loc); 1748 int needed_space = ocfs2_xi_entry_usage(xi); 1749 int size = namevalue_size_xi(xi); 1750 struct super_block *sb = loc->xl_inode->i_sb; 1751 1752 /* 1753 * Bucket storage does not reclaim name+value pairs it cannot 1754 * reuse. They live as holes until the bucket fills, and then 1755 * the bucket is defragmented. However, the bucket can reclaim 1756 * the ocfs2_xattr_entry. 1757 */ 1758 if (loc->xl_entry) { 1759 /* Don't need space if we're reusing! */ 1760 if (ocfs2_xa_can_reuse_entry(loc, xi)) 1761 needed_space = 0; 1762 else 1763 needed_space -= sizeof(struct ocfs2_xattr_entry); 1764 } 1765 BUG_ON(needed_space < 0); 1766 1767 if (free_start < size) { 1768 if (needed_space) 1769 return -ENOSPC; 1770 } else { 1771 /* 1772 * First we check if it would fit in the first place. 1773 * Below, we align the free start to a block. This may 1774 * slide us below the minimum gap. By checking unaligned 1775 * first, we avoid that error. 1776 */ 1777 rc = ocfs2_xa_check_space_helper(needed_space, free_start, 1778 count); 1779 if (rc) 1780 return rc; 1781 free_start = ocfs2_bucket_align_free_start(sb, free_start, 1782 size); 1783 } 1784 return ocfs2_xa_check_space_helper(needed_space, free_start, count); 1785 } 1786 1787 static void ocfs2_xa_bucket_wipe_namevalue(struct ocfs2_xa_loc *loc) 1788 { 1789 le16_add_cpu(&loc->xl_header->xh_name_value_len, 1790 -namevalue_size_xe(loc->xl_entry)); 1791 } 1792 1793 static void ocfs2_xa_bucket_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) 1794 { 1795 struct ocfs2_xattr_header *xh = loc->xl_header; 1796 int count = le16_to_cpu(xh->xh_count); 1797 int low = 0, high = count - 1, tmp; 1798 struct ocfs2_xattr_entry *tmp_xe; 1799 1800 /* 1801 * We keep buckets sorted by name_hash, so we need to find 1802 * our insert place. 1803 */ 1804 while (low <= high && count) { 1805 tmp = (low + high) / 2; 1806 tmp_xe = &xh->xh_entries[tmp]; 1807 1808 if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash)) 1809 low = tmp + 1; 1810 else if (name_hash < le32_to_cpu(tmp_xe->xe_name_hash)) 1811 high = tmp - 1; 1812 else { 1813 low = tmp; 1814 break; 1815 } 1816 } 1817 1818 if (low != count) 1819 memmove(&xh->xh_entries[low + 1], 1820 &xh->xh_entries[low], 1821 ((count - low) * sizeof(struct ocfs2_xattr_entry))); 1822 1823 le16_add_cpu(&xh->xh_count, 1); 1824 loc->xl_entry = &xh->xh_entries[low]; 1825 memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry)); 1826 } 1827 1828 static void ocfs2_xa_bucket_add_namevalue(struct ocfs2_xa_loc *loc, int size) 1829 { 1830 int free_start = ocfs2_xa_get_free_start(loc); 1831 struct ocfs2_xattr_header *xh = loc->xl_header; 1832 struct super_block *sb = loc->xl_inode->i_sb; 1833 int nameval_offset; 1834 1835 free_start = ocfs2_bucket_align_free_start(sb, free_start, size); 1836 nameval_offset = free_start - size; 1837 loc->xl_entry->xe_name_offset = cpu_to_le16(nameval_offset); 1838 xh->xh_free_start = cpu_to_le16(nameval_offset); 1839 le16_add_cpu(&xh->xh_name_value_len, size); 1840 1841 } 1842 1843 static void ocfs2_xa_bucket_fill_value_buf(struct ocfs2_xa_loc *loc, 1844 struct ocfs2_xattr_value_buf *vb) 1845 { 1846 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1847 struct super_block *sb = loc->xl_inode->i_sb; 1848 int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 1849 int size = namevalue_size_xe(loc->xl_entry); 1850 int block_offset = nameval_offset >> sb->s_blocksize_bits; 1851 1852 /* Values are not allowed to straddle block boundaries */ 1853 BUG_ON(block_offset != 1854 ((nameval_offset + size - 1) >> sb->s_blocksize_bits)); 1855 /* We expect the bucket to be filled in */ 1856 BUG_ON(!bucket->bu_bhs[block_offset]); 1857 1858 vb->vb_access = ocfs2_journal_access; 1859 vb->vb_bh = bucket->bu_bhs[block_offset]; 1860 } 1861 1862 /* Operations for xattrs stored in buckets. */ 1863 static const struct ocfs2_xa_loc_operations ocfs2_xa_bucket_loc_ops = { 1864 .xlo_journal_access = ocfs2_xa_bucket_journal_access, 1865 .xlo_journal_dirty = ocfs2_xa_bucket_journal_dirty, 1866 .xlo_offset_pointer = ocfs2_xa_bucket_offset_pointer, 1867 .xlo_check_space = ocfs2_xa_bucket_check_space, 1868 .xlo_can_reuse = ocfs2_xa_bucket_can_reuse, 1869 .xlo_get_free_start = ocfs2_xa_bucket_get_free_start, 1870 .xlo_wipe_namevalue = ocfs2_xa_bucket_wipe_namevalue, 1871 .xlo_add_entry = ocfs2_xa_bucket_add_entry, 1872 .xlo_add_namevalue = ocfs2_xa_bucket_add_namevalue, 1873 .xlo_fill_value_buf = ocfs2_xa_bucket_fill_value_buf, 1874 }; 1875 1876 static unsigned int ocfs2_xa_value_clusters(struct ocfs2_xa_loc *loc) 1877 { 1878 struct ocfs2_xattr_value_buf vb; 1879 1880 if (ocfs2_xattr_is_local(loc->xl_entry)) 1881 return 0; 1882 1883 ocfs2_xa_fill_value_buf(loc, &vb); 1884 return le32_to_cpu(vb.vb_xv->xr_clusters); 1885 } 1886 1887 static int ocfs2_xa_value_truncate(struct ocfs2_xa_loc *loc, u64 bytes, 1888 struct ocfs2_xattr_set_ctxt *ctxt) 1889 { 1890 int trunc_rc, access_rc; 1891 struct ocfs2_xattr_value_buf vb; 1892 1893 ocfs2_xa_fill_value_buf(loc, &vb); 1894 trunc_rc = ocfs2_xattr_value_truncate(loc->xl_inode, &vb, bytes, 1895 ctxt); 1896 1897 /* 1898 * The caller of ocfs2_xa_value_truncate() has already called 1899 * ocfs2_xa_journal_access on the loc. However, The truncate code 1900 * calls ocfs2_extend_trans(). This may commit the previous 1901 * transaction and open a new one. If this is a bucket, truncate 1902 * could leave only vb->vb_bh set up for journaling. Meanwhile, 1903 * the caller is expecting to dirty the entire bucket. So we must 1904 * reset the journal work. We do this even if truncate has failed, 1905 * as it could have failed after committing the extend. 1906 */ 1907 access_rc = ocfs2_xa_journal_access(ctxt->handle, loc, 1908 OCFS2_JOURNAL_ACCESS_WRITE); 1909 1910 /* Errors in truncate take precedence */ 1911 return trunc_rc ? trunc_rc : access_rc; 1912 } 1913 1914 static void ocfs2_xa_remove_entry(struct ocfs2_xa_loc *loc) 1915 { 1916 int index, count; 1917 struct ocfs2_xattr_header *xh = loc->xl_header; 1918 struct ocfs2_xattr_entry *entry = loc->xl_entry; 1919 1920 ocfs2_xa_wipe_namevalue(loc); 1921 loc->xl_entry = NULL; 1922 1923 le16_add_cpu(&xh->xh_count, -1); 1924 count = le16_to_cpu(xh->xh_count); 1925 1926 /* 1927 * Only zero out the entry if there are more remaining. This is 1928 * important for an empty bucket, as it keeps track of the 1929 * bucket's hash value. It doesn't hurt empty block storage. 1930 */ 1931 if (count) { 1932 index = ((char *)entry - (char *)&xh->xh_entries) / 1933 sizeof(struct ocfs2_xattr_entry); 1934 memmove(&xh->xh_entries[index], &xh->xh_entries[index + 1], 1935 (count - index) * sizeof(struct ocfs2_xattr_entry)); 1936 memset(&xh->xh_entries[count], 0, 1937 sizeof(struct ocfs2_xattr_entry)); 1938 } 1939 } 1940 1941 /* 1942 * If we have a problem adjusting the size of an external value during 1943 * ocfs2_xa_prepare_entry() or ocfs2_xa_remove(), we may have an xattr 1944 * in an intermediate state. For example, the value may be partially 1945 * truncated. 1946 * 1947 * If the value tree hasn't changed, the extend/truncate went nowhere. 1948 * We have nothing to do. The caller can treat it as a straight error. 1949 * 1950 * If the value tree got partially truncated, we now have a corrupted 1951 * extended attribute. We're going to wipe its entry and leak the 1952 * clusters. Better to leak some storage than leave a corrupt entry. 1953 * 1954 * If the value tree grew, it obviously didn't grow enough for the 1955 * new entry. We're not going to try and reclaim those clusters either. 1956 * If there was already an external value there (orig_clusters != 0), 1957 * the new clusters are attached safely and we can just leave the old 1958 * value in place. If there was no external value there, we remove 1959 * the entry. 1960 * 1961 * This way, the xattr block we store in the journal will be consistent. 1962 * If the size change broke because of the journal, no changes will hit 1963 * disk anyway. 1964 */ 1965 static void ocfs2_xa_cleanup_value_truncate(struct ocfs2_xa_loc *loc, 1966 const char *what, 1967 unsigned int orig_clusters) 1968 { 1969 unsigned int new_clusters = ocfs2_xa_value_clusters(loc); 1970 char *nameval_buf = ocfs2_xa_offset_pointer(loc, 1971 le16_to_cpu(loc->xl_entry->xe_name_offset)); 1972 1973 if (new_clusters < orig_clusters) { 1974 mlog(ML_ERROR, 1975 "Partial truncate while %s xattr %.*s. Leaking " 1976 "%u clusters and removing the entry\n", 1977 what, loc->xl_entry->xe_name_len, nameval_buf, 1978 orig_clusters - new_clusters); 1979 ocfs2_xa_remove_entry(loc); 1980 } else if (!orig_clusters) { 1981 mlog(ML_ERROR, 1982 "Unable to allocate an external value for xattr " 1983 "%.*s safely. Leaking %u clusters and removing the " 1984 "entry\n", 1985 loc->xl_entry->xe_name_len, nameval_buf, 1986 new_clusters - orig_clusters); 1987 ocfs2_xa_remove_entry(loc); 1988 } else if (new_clusters > orig_clusters) 1989 mlog(ML_ERROR, 1990 "Unable to grow xattr %.*s safely. %u new clusters " 1991 "have been added, but the value will not be " 1992 "modified\n", 1993 loc->xl_entry->xe_name_len, nameval_buf, 1994 new_clusters - orig_clusters); 1995 } 1996 1997 static int ocfs2_xa_remove(struct ocfs2_xa_loc *loc, 1998 struct ocfs2_xattr_set_ctxt *ctxt) 1999 { 2000 int rc = 0; 2001 unsigned int orig_clusters; 2002 2003 if (!ocfs2_xattr_is_local(loc->xl_entry)) { 2004 orig_clusters = ocfs2_xa_value_clusters(loc); 2005 rc = ocfs2_xa_value_truncate(loc, 0, ctxt); 2006 if (rc) { 2007 mlog_errno(rc); 2008 /* 2009 * Since this is remove, we can return 0 if 2010 * ocfs2_xa_cleanup_value_truncate() is going to 2011 * wipe the entry anyway. So we check the 2012 * cluster count as well. 2013 */ 2014 if (orig_clusters != ocfs2_xa_value_clusters(loc)) 2015 rc = 0; 2016 ocfs2_xa_cleanup_value_truncate(loc, "removing", 2017 orig_clusters); 2018 if (rc) 2019 goto out; 2020 } 2021 } 2022 2023 ocfs2_xa_remove_entry(loc); 2024 2025 out: 2026 return rc; 2027 } 2028 2029 static void ocfs2_xa_install_value_root(struct ocfs2_xa_loc *loc) 2030 { 2031 int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len); 2032 char *nameval_buf; 2033 2034 nameval_buf = ocfs2_xa_offset_pointer(loc, 2035 le16_to_cpu(loc->xl_entry->xe_name_offset)); 2036 memcpy(nameval_buf + name_size, &def_xv, OCFS2_XATTR_ROOT_SIZE); 2037 } 2038 2039 /* 2040 * Take an existing entry and make it ready for the new value. This 2041 * won't allocate space, but it may free space. It should be ready for 2042 * ocfs2_xa_prepare_entry() to finish the work. 2043 */ 2044 static int ocfs2_xa_reuse_entry(struct ocfs2_xa_loc *loc, 2045 struct ocfs2_xattr_info *xi, 2046 struct ocfs2_xattr_set_ctxt *ctxt) 2047 { 2048 int rc = 0; 2049 int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len); 2050 unsigned int orig_clusters; 2051 char *nameval_buf; 2052 int xe_local = ocfs2_xattr_is_local(loc->xl_entry); 2053 int xi_local = xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE; 2054 2055 BUG_ON(OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len) != 2056 name_size); 2057 2058 nameval_buf = ocfs2_xa_offset_pointer(loc, 2059 le16_to_cpu(loc->xl_entry->xe_name_offset)); 2060 if (xe_local) { 2061 memset(nameval_buf + name_size, 0, 2062 namevalue_size_xe(loc->xl_entry) - name_size); 2063 if (!xi_local) 2064 ocfs2_xa_install_value_root(loc); 2065 } else { 2066 orig_clusters = ocfs2_xa_value_clusters(loc); 2067 if (xi_local) { 2068 rc = ocfs2_xa_value_truncate(loc, 0, ctxt); 2069 if (rc < 0) 2070 mlog_errno(rc); 2071 else 2072 memset(nameval_buf + name_size, 0, 2073 namevalue_size_xe(loc->xl_entry) - 2074 name_size); 2075 } else if (le64_to_cpu(loc->xl_entry->xe_value_size) > 2076 xi->xi_value_len) { 2077 rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, 2078 ctxt); 2079 if (rc < 0) 2080 mlog_errno(rc); 2081 } 2082 2083 if (rc) { 2084 ocfs2_xa_cleanup_value_truncate(loc, "reusing", 2085 orig_clusters); 2086 goto out; 2087 } 2088 } 2089 2090 loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len); 2091 ocfs2_xattr_set_local(loc->xl_entry, xi_local); 2092 2093 out: 2094 return rc; 2095 } 2096 2097 /* 2098 * Prepares loc->xl_entry to receive the new xattr. This includes 2099 * properly setting up the name+value pair region. If loc->xl_entry 2100 * already exists, it will take care of modifying it appropriately. 2101 * 2102 * Note that this modifies the data. You did journal_access already, 2103 * right? 2104 */ 2105 static int ocfs2_xa_prepare_entry(struct ocfs2_xa_loc *loc, 2106 struct ocfs2_xattr_info *xi, 2107 u32 name_hash, 2108 struct ocfs2_xattr_set_ctxt *ctxt) 2109 { 2110 int rc = 0; 2111 unsigned int orig_clusters; 2112 __le64 orig_value_size = 0; 2113 2114 rc = ocfs2_xa_check_space(loc, xi); 2115 if (rc) 2116 goto out; 2117 2118 if (loc->xl_entry) { 2119 if (ocfs2_xa_can_reuse_entry(loc, xi)) { 2120 orig_value_size = loc->xl_entry->xe_value_size; 2121 rc = ocfs2_xa_reuse_entry(loc, xi, ctxt); 2122 if (rc) 2123 goto out; 2124 goto alloc_value; 2125 } 2126 2127 if (!ocfs2_xattr_is_local(loc->xl_entry)) { 2128 orig_clusters = ocfs2_xa_value_clusters(loc); 2129 rc = ocfs2_xa_value_truncate(loc, 0, ctxt); 2130 if (rc) { 2131 mlog_errno(rc); 2132 ocfs2_xa_cleanup_value_truncate(loc, 2133 "overwriting", 2134 orig_clusters); 2135 goto out; 2136 } 2137 } 2138 ocfs2_xa_wipe_namevalue(loc); 2139 } else 2140 ocfs2_xa_add_entry(loc, name_hash); 2141 2142 /* 2143 * If we get here, we have a blank entry. Fill it. We grow our 2144 * name+value pair back from the end. 2145 */ 2146 ocfs2_xa_add_namevalue(loc, xi); 2147 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) 2148 ocfs2_xa_install_value_root(loc); 2149 2150 alloc_value: 2151 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 2152 orig_clusters = ocfs2_xa_value_clusters(loc); 2153 rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, ctxt); 2154 if (rc < 0) { 2155 ctxt->set_abort = 1; 2156 ocfs2_xa_cleanup_value_truncate(loc, "growing", 2157 orig_clusters); 2158 /* 2159 * If we were growing an existing value, 2160 * ocfs2_xa_cleanup_value_truncate() won't remove 2161 * the entry. We need to restore the original value 2162 * size. 2163 */ 2164 if (loc->xl_entry) { 2165 BUG_ON(!orig_value_size); 2166 loc->xl_entry->xe_value_size = orig_value_size; 2167 } 2168 mlog_errno(rc); 2169 } 2170 } 2171 2172 out: 2173 return rc; 2174 } 2175 2176 /* 2177 * Store the value portion of the name+value pair. This will skip 2178 * values that are stored externally. Their tree roots were set up 2179 * by ocfs2_xa_prepare_entry(). 2180 */ 2181 static int ocfs2_xa_store_value(struct ocfs2_xa_loc *loc, 2182 struct ocfs2_xattr_info *xi, 2183 struct ocfs2_xattr_set_ctxt *ctxt) 2184 { 2185 int rc = 0; 2186 int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 2187 int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len); 2188 char *nameval_buf; 2189 struct ocfs2_xattr_value_buf vb; 2190 2191 nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset); 2192 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 2193 ocfs2_xa_fill_value_buf(loc, &vb); 2194 rc = __ocfs2_xattr_set_value_outside(loc->xl_inode, 2195 ctxt->handle, &vb, 2196 xi->xi_value, 2197 xi->xi_value_len); 2198 } else 2199 memcpy(nameval_buf + name_size, xi->xi_value, xi->xi_value_len); 2200 2201 return rc; 2202 } 2203 2204 static int ocfs2_xa_set(struct ocfs2_xa_loc *loc, 2205 struct ocfs2_xattr_info *xi, 2206 struct ocfs2_xattr_set_ctxt *ctxt) 2207 { 2208 int ret; 2209 u32 name_hash = ocfs2_xattr_name_hash(loc->xl_inode, xi->xi_name, 2210 xi->xi_name_len); 2211 2212 ret = ocfs2_xa_journal_access(ctxt->handle, loc, 2213 OCFS2_JOURNAL_ACCESS_WRITE); 2214 if (ret) { 2215 mlog_errno(ret); 2216 goto out; 2217 } 2218 2219 /* 2220 * From here on out, everything is going to modify the buffer a 2221 * little. Errors are going to leave the xattr header in a 2222 * sane state. Thus, even with errors we dirty the sucker. 2223 */ 2224 2225 /* Don't worry, we are never called with !xi_value and !xl_entry */ 2226 if (!xi->xi_value) { 2227 ret = ocfs2_xa_remove(loc, ctxt); 2228 goto out_dirty; 2229 } 2230 2231 ret = ocfs2_xa_prepare_entry(loc, xi, name_hash, ctxt); 2232 if (ret) { 2233 if (ret != -ENOSPC) 2234 mlog_errno(ret); 2235 goto out_dirty; 2236 } 2237 2238 ret = ocfs2_xa_store_value(loc, xi, ctxt); 2239 if (ret) 2240 mlog_errno(ret); 2241 2242 out_dirty: 2243 ocfs2_xa_journal_dirty(ctxt->handle, loc); 2244 2245 out: 2246 return ret; 2247 } 2248 2249 static void ocfs2_init_dinode_xa_loc(struct ocfs2_xa_loc *loc, 2250 struct inode *inode, 2251 struct buffer_head *bh, 2252 struct ocfs2_xattr_entry *entry) 2253 { 2254 struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; 2255 2256 BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_XATTR_FL)); 2257 2258 loc->xl_inode = inode; 2259 loc->xl_ops = &ocfs2_xa_block_loc_ops; 2260 loc->xl_storage = bh; 2261 loc->xl_entry = entry; 2262 loc->xl_size = le16_to_cpu(di->i_xattr_inline_size); 2263 loc->xl_header = 2264 (struct ocfs2_xattr_header *)(bh->b_data + bh->b_size - 2265 loc->xl_size); 2266 } 2267 2268 static void ocfs2_init_xattr_block_xa_loc(struct ocfs2_xa_loc *loc, 2269 struct inode *inode, 2270 struct buffer_head *bh, 2271 struct ocfs2_xattr_entry *entry) 2272 { 2273 struct ocfs2_xattr_block *xb = 2274 (struct ocfs2_xattr_block *)bh->b_data; 2275 2276 BUG_ON(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED); 2277 2278 loc->xl_inode = inode; 2279 loc->xl_ops = &ocfs2_xa_block_loc_ops; 2280 loc->xl_storage = bh; 2281 loc->xl_header = &(xb->xb_attrs.xb_header); 2282 loc->xl_entry = entry; 2283 loc->xl_size = bh->b_size - offsetof(struct ocfs2_xattr_block, 2284 xb_attrs.xb_header); 2285 } 2286 2287 static void ocfs2_init_xattr_bucket_xa_loc(struct ocfs2_xa_loc *loc, 2288 struct ocfs2_xattr_bucket *bucket, 2289 struct ocfs2_xattr_entry *entry) 2290 { 2291 loc->xl_inode = bucket->bu_inode; 2292 loc->xl_ops = &ocfs2_xa_bucket_loc_ops; 2293 loc->xl_storage = bucket; 2294 loc->xl_header = bucket_xh(bucket); 2295 loc->xl_entry = entry; 2296 loc->xl_size = OCFS2_XATTR_BUCKET_SIZE; 2297 } 2298 2299 /* 2300 * In xattr remove, if it is stored outside and refcounted, we may have 2301 * the chance to split the refcount tree. So need the allocators. 2302 */ 2303 static int ocfs2_lock_xattr_remove_allocators(struct inode *inode, 2304 struct ocfs2_xattr_value_root *xv, 2305 struct ocfs2_caching_info *ref_ci, 2306 struct buffer_head *ref_root_bh, 2307 struct ocfs2_alloc_context **meta_ac, 2308 int *ref_credits) 2309 { 2310 int ret, meta_add = 0; 2311 u32 p_cluster, num_clusters; 2312 unsigned int ext_flags; 2313 2314 *ref_credits = 0; 2315 ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster, 2316 &num_clusters, 2317 &xv->xr_list, 2318 &ext_flags); 2319 if (ret) { 2320 mlog_errno(ret); 2321 goto out; 2322 } 2323 2324 if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) 2325 goto out; 2326 2327 ret = ocfs2_refcounted_xattr_delete_need(inode, ref_ci, 2328 ref_root_bh, xv, 2329 &meta_add, ref_credits); 2330 if (ret) { 2331 mlog_errno(ret); 2332 goto out; 2333 } 2334 2335 ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb), 2336 meta_add, meta_ac); 2337 if (ret) 2338 mlog_errno(ret); 2339 2340 out: 2341 return ret; 2342 } 2343 2344 static int ocfs2_remove_value_outside(struct inode*inode, 2345 struct ocfs2_xattr_value_buf *vb, 2346 struct ocfs2_xattr_header *header, 2347 struct ocfs2_caching_info *ref_ci, 2348 struct buffer_head *ref_root_bh) 2349 { 2350 int ret = 0, i, ref_credits; 2351 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2352 struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, }; 2353 void *val; 2354 2355 ocfs2_init_dealloc_ctxt(&ctxt.dealloc); 2356 2357 for (i = 0; i < le16_to_cpu(header->xh_count); i++) { 2358 struct ocfs2_xattr_entry *entry = &header->xh_entries[i]; 2359 2360 if (ocfs2_xattr_is_local(entry)) 2361 continue; 2362 2363 val = (void *)header + 2364 le16_to_cpu(entry->xe_name_offset); 2365 vb->vb_xv = (struct ocfs2_xattr_value_root *) 2366 (val + OCFS2_XATTR_SIZE(entry->xe_name_len)); 2367 2368 ret = ocfs2_lock_xattr_remove_allocators(inode, vb->vb_xv, 2369 ref_ci, ref_root_bh, 2370 &ctxt.meta_ac, 2371 &ref_credits); 2372 2373 ctxt.handle = ocfs2_start_trans(osb, ref_credits + 2374 ocfs2_remove_extent_credits(osb->sb)); 2375 if (IS_ERR(ctxt.handle)) { 2376 ret = PTR_ERR(ctxt.handle); 2377 mlog_errno(ret); 2378 break; 2379 } 2380 2381 ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt); 2382 if (ret < 0) { 2383 mlog_errno(ret); 2384 break; 2385 } 2386 2387 ocfs2_commit_trans(osb, ctxt.handle); 2388 if (ctxt.meta_ac) { 2389 ocfs2_free_alloc_context(ctxt.meta_ac); 2390 ctxt.meta_ac = NULL; 2391 } 2392 } 2393 2394 if (ctxt.meta_ac) 2395 ocfs2_free_alloc_context(ctxt.meta_ac); 2396 ocfs2_schedule_truncate_log_flush(osb, 1); 2397 ocfs2_run_deallocs(osb, &ctxt.dealloc); 2398 return ret; 2399 } 2400 2401 static int ocfs2_xattr_ibody_remove(struct inode *inode, 2402 struct buffer_head *di_bh, 2403 struct ocfs2_caching_info *ref_ci, 2404 struct buffer_head *ref_root_bh) 2405 { 2406 2407 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 2408 struct ocfs2_xattr_header *header; 2409 int ret; 2410 struct ocfs2_xattr_value_buf vb = { 2411 .vb_bh = di_bh, 2412 .vb_access = ocfs2_journal_access_di, 2413 }; 2414 2415 header = (struct ocfs2_xattr_header *) 2416 ((void *)di + inode->i_sb->s_blocksize - 2417 le16_to_cpu(di->i_xattr_inline_size)); 2418 2419 ret = ocfs2_remove_value_outside(inode, &vb, header, 2420 ref_ci, ref_root_bh); 2421 2422 return ret; 2423 } 2424 2425 struct ocfs2_rm_xattr_bucket_para { 2426 struct ocfs2_caching_info *ref_ci; 2427 struct buffer_head *ref_root_bh; 2428 }; 2429 2430 static int ocfs2_xattr_block_remove(struct inode *inode, 2431 struct buffer_head *blk_bh, 2432 struct ocfs2_caching_info *ref_ci, 2433 struct buffer_head *ref_root_bh) 2434 { 2435 struct ocfs2_xattr_block *xb; 2436 int ret = 0; 2437 struct ocfs2_xattr_value_buf vb = { 2438 .vb_bh = blk_bh, 2439 .vb_access = ocfs2_journal_access_xb, 2440 }; 2441 struct ocfs2_rm_xattr_bucket_para args = { 2442 .ref_ci = ref_ci, 2443 .ref_root_bh = ref_root_bh, 2444 }; 2445 2446 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 2447 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 2448 struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header); 2449 ret = ocfs2_remove_value_outside(inode, &vb, header, 2450 ref_ci, ref_root_bh); 2451 } else 2452 ret = ocfs2_iterate_xattr_index_block(inode, 2453 blk_bh, 2454 ocfs2_rm_xattr_cluster, 2455 &args); 2456 2457 return ret; 2458 } 2459 2460 static int ocfs2_xattr_free_block(struct inode *inode, 2461 u64 block, 2462 struct ocfs2_caching_info *ref_ci, 2463 struct buffer_head *ref_root_bh) 2464 { 2465 struct inode *xb_alloc_inode; 2466 struct buffer_head *xb_alloc_bh = NULL; 2467 struct buffer_head *blk_bh = NULL; 2468 struct ocfs2_xattr_block *xb; 2469 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2470 handle_t *handle; 2471 int ret = 0; 2472 u64 blk, bg_blkno; 2473 u16 bit; 2474 2475 ret = ocfs2_read_xattr_block(inode, block, &blk_bh); 2476 if (ret < 0) { 2477 mlog_errno(ret); 2478 goto out; 2479 } 2480 2481 ret = ocfs2_xattr_block_remove(inode, blk_bh, ref_ci, ref_root_bh); 2482 if (ret < 0) { 2483 mlog_errno(ret); 2484 goto out; 2485 } 2486 2487 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 2488 blk = le64_to_cpu(xb->xb_blkno); 2489 bit = le16_to_cpu(xb->xb_suballoc_bit); 2490 if (xb->xb_suballoc_loc) 2491 bg_blkno = le64_to_cpu(xb->xb_suballoc_loc); 2492 else 2493 bg_blkno = ocfs2_which_suballoc_group(blk, bit); 2494 2495 xb_alloc_inode = ocfs2_get_system_file_inode(osb, 2496 EXTENT_ALLOC_SYSTEM_INODE, 2497 le16_to_cpu(xb->xb_suballoc_slot)); 2498 if (!xb_alloc_inode) { 2499 ret = -ENOMEM; 2500 mlog_errno(ret); 2501 goto out; 2502 } 2503 mutex_lock(&xb_alloc_inode->i_mutex); 2504 2505 ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1); 2506 if (ret < 0) { 2507 mlog_errno(ret); 2508 goto out_mutex; 2509 } 2510 2511 handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE); 2512 if (IS_ERR(handle)) { 2513 ret = PTR_ERR(handle); 2514 mlog_errno(ret); 2515 goto out_unlock; 2516 } 2517 2518 ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh, 2519 bit, bg_blkno, 1); 2520 if (ret < 0) 2521 mlog_errno(ret); 2522 2523 ocfs2_commit_trans(osb, handle); 2524 out_unlock: 2525 ocfs2_inode_unlock(xb_alloc_inode, 1); 2526 brelse(xb_alloc_bh); 2527 out_mutex: 2528 mutex_unlock(&xb_alloc_inode->i_mutex); 2529 iput(xb_alloc_inode); 2530 out: 2531 brelse(blk_bh); 2532 return ret; 2533 } 2534 2535 /* 2536 * ocfs2_xattr_remove() 2537 * 2538 * Free extended attribute resources associated with this inode. 2539 */ 2540 int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh) 2541 { 2542 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2543 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 2544 struct ocfs2_refcount_tree *ref_tree = NULL; 2545 struct buffer_head *ref_root_bh = NULL; 2546 struct ocfs2_caching_info *ref_ci = NULL; 2547 handle_t *handle; 2548 int ret; 2549 2550 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) 2551 return 0; 2552 2553 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) 2554 return 0; 2555 2556 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL) { 2557 ret = ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb), 2558 le64_to_cpu(di->i_refcount_loc), 2559 1, &ref_tree, &ref_root_bh); 2560 if (ret) { 2561 mlog_errno(ret); 2562 goto out; 2563 } 2564 ref_ci = &ref_tree->rf_ci; 2565 2566 } 2567 2568 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 2569 ret = ocfs2_xattr_ibody_remove(inode, di_bh, 2570 ref_ci, ref_root_bh); 2571 if (ret < 0) { 2572 mlog_errno(ret); 2573 goto out; 2574 } 2575 } 2576 2577 if (di->i_xattr_loc) { 2578 ret = ocfs2_xattr_free_block(inode, 2579 le64_to_cpu(di->i_xattr_loc), 2580 ref_ci, ref_root_bh); 2581 if (ret < 0) { 2582 mlog_errno(ret); 2583 goto out; 2584 } 2585 } 2586 2587 handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), 2588 OCFS2_INODE_UPDATE_CREDITS); 2589 if (IS_ERR(handle)) { 2590 ret = PTR_ERR(handle); 2591 mlog_errno(ret); 2592 goto out; 2593 } 2594 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, 2595 OCFS2_JOURNAL_ACCESS_WRITE); 2596 if (ret) { 2597 mlog_errno(ret); 2598 goto out_commit; 2599 } 2600 2601 di->i_xattr_loc = 0; 2602 2603 spin_lock(&oi->ip_lock); 2604 oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL); 2605 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 2606 spin_unlock(&oi->ip_lock); 2607 2608 ocfs2_journal_dirty(handle, di_bh); 2609 out_commit: 2610 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); 2611 out: 2612 if (ref_tree) 2613 ocfs2_unlock_refcount_tree(OCFS2_SB(inode->i_sb), ref_tree, 1); 2614 brelse(ref_root_bh); 2615 return ret; 2616 } 2617 2618 static int ocfs2_xattr_has_space_inline(struct inode *inode, 2619 struct ocfs2_dinode *di) 2620 { 2621 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2622 unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size; 2623 int free; 2624 2625 if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE) 2626 return 0; 2627 2628 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 2629 struct ocfs2_inline_data *idata = &di->id2.i_data; 2630 free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size); 2631 } else if (ocfs2_inode_is_fast_symlink(inode)) { 2632 free = ocfs2_fast_symlink_chars(inode->i_sb) - 2633 le64_to_cpu(di->i_size); 2634 } else { 2635 struct ocfs2_extent_list *el = &di->id2.i_list; 2636 free = (le16_to_cpu(el->l_count) - 2637 le16_to_cpu(el->l_next_free_rec)) * 2638 sizeof(struct ocfs2_extent_rec); 2639 } 2640 if (free >= xattrsize) 2641 return 1; 2642 2643 return 0; 2644 } 2645 2646 /* 2647 * ocfs2_xattr_ibody_find() 2648 * 2649 * Find extended attribute in inode block and 2650 * fill search info into struct ocfs2_xattr_search. 2651 */ 2652 static int ocfs2_xattr_ibody_find(struct inode *inode, 2653 int name_index, 2654 const char *name, 2655 struct ocfs2_xattr_search *xs) 2656 { 2657 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2658 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 2659 int ret; 2660 int has_space = 0; 2661 2662 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) 2663 return 0; 2664 2665 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { 2666 down_read(&oi->ip_alloc_sem); 2667 has_space = ocfs2_xattr_has_space_inline(inode, di); 2668 up_read(&oi->ip_alloc_sem); 2669 if (!has_space) 2670 return 0; 2671 } 2672 2673 xs->xattr_bh = xs->inode_bh; 2674 xs->end = (void *)di + inode->i_sb->s_blocksize; 2675 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) 2676 xs->header = (struct ocfs2_xattr_header *) 2677 (xs->end - le16_to_cpu(di->i_xattr_inline_size)); 2678 else 2679 xs->header = (struct ocfs2_xattr_header *) 2680 (xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size); 2681 xs->base = (void *)xs->header; 2682 xs->here = xs->header->xh_entries; 2683 2684 /* Find the named attribute. */ 2685 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 2686 ret = ocfs2_xattr_find_entry(name_index, name, xs); 2687 if (ret && ret != -ENODATA) 2688 return ret; 2689 xs->not_found = ret; 2690 } 2691 2692 return 0; 2693 } 2694 2695 static int ocfs2_xattr_ibody_init(struct inode *inode, 2696 struct buffer_head *di_bh, 2697 struct ocfs2_xattr_set_ctxt *ctxt) 2698 { 2699 int ret; 2700 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2701 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 2702 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2703 unsigned int xattrsize = osb->s_xattr_inline_size; 2704 2705 if (!ocfs2_xattr_has_space_inline(inode, di)) { 2706 ret = -ENOSPC; 2707 goto out; 2708 } 2709 2710 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), di_bh, 2711 OCFS2_JOURNAL_ACCESS_WRITE); 2712 if (ret) { 2713 mlog_errno(ret); 2714 goto out; 2715 } 2716 2717 /* 2718 * Adjust extent record count or inline data size 2719 * to reserve space for extended attribute. 2720 */ 2721 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 2722 struct ocfs2_inline_data *idata = &di->id2.i_data; 2723 le16_add_cpu(&idata->id_count, -xattrsize); 2724 } else if (!(ocfs2_inode_is_fast_symlink(inode))) { 2725 struct ocfs2_extent_list *el = &di->id2.i_list; 2726 le16_add_cpu(&el->l_count, -(xattrsize / 2727 sizeof(struct ocfs2_extent_rec))); 2728 } 2729 di->i_xattr_inline_size = cpu_to_le16(xattrsize); 2730 2731 spin_lock(&oi->ip_lock); 2732 oi->ip_dyn_features |= OCFS2_INLINE_XATTR_FL|OCFS2_HAS_XATTR_FL; 2733 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 2734 spin_unlock(&oi->ip_lock); 2735 2736 ocfs2_journal_dirty(ctxt->handle, di_bh); 2737 2738 out: 2739 return ret; 2740 } 2741 2742 /* 2743 * ocfs2_xattr_ibody_set() 2744 * 2745 * Set, replace or remove an extended attribute into inode block. 2746 * 2747 */ 2748 static int ocfs2_xattr_ibody_set(struct inode *inode, 2749 struct ocfs2_xattr_info *xi, 2750 struct ocfs2_xattr_search *xs, 2751 struct ocfs2_xattr_set_ctxt *ctxt) 2752 { 2753 int ret; 2754 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2755 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 2756 struct ocfs2_xa_loc loc; 2757 2758 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) 2759 return -ENOSPC; 2760 2761 down_write(&oi->ip_alloc_sem); 2762 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { 2763 if (!ocfs2_xattr_has_space_inline(inode, di)) { 2764 ret = -ENOSPC; 2765 goto out; 2766 } 2767 } 2768 2769 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { 2770 ret = ocfs2_xattr_ibody_init(inode, xs->inode_bh, ctxt); 2771 if (ret) { 2772 if (ret != -ENOSPC) 2773 mlog_errno(ret); 2774 goto out; 2775 } 2776 } 2777 2778 ocfs2_init_dinode_xa_loc(&loc, inode, xs->inode_bh, 2779 xs->not_found ? NULL : xs->here); 2780 ret = ocfs2_xa_set(&loc, xi, ctxt); 2781 if (ret) { 2782 if (ret != -ENOSPC) 2783 mlog_errno(ret); 2784 goto out; 2785 } 2786 xs->here = loc.xl_entry; 2787 2788 out: 2789 up_write(&oi->ip_alloc_sem); 2790 2791 return ret; 2792 } 2793 2794 /* 2795 * ocfs2_xattr_block_find() 2796 * 2797 * Find extended attribute in external block and 2798 * fill search info into struct ocfs2_xattr_search. 2799 */ 2800 static int ocfs2_xattr_block_find(struct inode *inode, 2801 int name_index, 2802 const char *name, 2803 struct ocfs2_xattr_search *xs) 2804 { 2805 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 2806 struct buffer_head *blk_bh = NULL; 2807 struct ocfs2_xattr_block *xb; 2808 int ret = 0; 2809 2810 if (!di->i_xattr_loc) 2811 return ret; 2812 2813 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc), 2814 &blk_bh); 2815 if (ret < 0) { 2816 mlog_errno(ret); 2817 return ret; 2818 } 2819 2820 xs->xattr_bh = blk_bh; 2821 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 2822 2823 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 2824 xs->header = &xb->xb_attrs.xb_header; 2825 xs->base = (void *)xs->header; 2826 xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size; 2827 xs->here = xs->header->xh_entries; 2828 2829 ret = ocfs2_xattr_find_entry(name_index, name, xs); 2830 } else 2831 ret = ocfs2_xattr_index_block_find(inode, blk_bh, 2832 name_index, 2833 name, xs); 2834 2835 if (ret && ret != -ENODATA) { 2836 xs->xattr_bh = NULL; 2837 goto cleanup; 2838 } 2839 xs->not_found = ret; 2840 return 0; 2841 cleanup: 2842 brelse(blk_bh); 2843 2844 return ret; 2845 } 2846 2847 static int ocfs2_create_xattr_block(struct inode *inode, 2848 struct buffer_head *inode_bh, 2849 struct ocfs2_xattr_set_ctxt *ctxt, 2850 int indexed, 2851 struct buffer_head **ret_bh) 2852 { 2853 int ret; 2854 u16 suballoc_bit_start; 2855 u32 num_got; 2856 u64 suballoc_loc, first_blkno; 2857 struct ocfs2_dinode *di = (struct ocfs2_dinode *)inode_bh->b_data; 2858 struct buffer_head *new_bh = NULL; 2859 struct ocfs2_xattr_block *xblk; 2860 2861 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), 2862 inode_bh, OCFS2_JOURNAL_ACCESS_CREATE); 2863 if (ret < 0) { 2864 mlog_errno(ret); 2865 goto end; 2866 } 2867 2868 ret = ocfs2_claim_metadata(ctxt->handle, ctxt->meta_ac, 1, 2869 &suballoc_loc, &suballoc_bit_start, 2870 &num_got, &first_blkno); 2871 if (ret < 0) { 2872 mlog_errno(ret); 2873 goto end; 2874 } 2875 2876 new_bh = sb_getblk(inode->i_sb, first_blkno); 2877 ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh); 2878 2879 ret = ocfs2_journal_access_xb(ctxt->handle, INODE_CACHE(inode), 2880 new_bh, 2881 OCFS2_JOURNAL_ACCESS_CREATE); 2882 if (ret < 0) { 2883 mlog_errno(ret); 2884 goto end; 2885 } 2886 2887 /* Initialize ocfs2_xattr_block */ 2888 xblk = (struct ocfs2_xattr_block *)new_bh->b_data; 2889 memset(xblk, 0, inode->i_sb->s_blocksize); 2890 strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE); 2891 xblk->xb_suballoc_slot = cpu_to_le16(ctxt->meta_ac->ac_alloc_slot); 2892 xblk->xb_suballoc_loc = cpu_to_le64(suballoc_loc); 2893 xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start); 2894 xblk->xb_fs_generation = 2895 cpu_to_le32(OCFS2_SB(inode->i_sb)->fs_generation); 2896 xblk->xb_blkno = cpu_to_le64(first_blkno); 2897 if (indexed) { 2898 struct ocfs2_xattr_tree_root *xr = &xblk->xb_attrs.xb_root; 2899 xr->xt_clusters = cpu_to_le32(1); 2900 xr->xt_last_eb_blk = 0; 2901 xr->xt_list.l_tree_depth = 0; 2902 xr->xt_list.l_count = cpu_to_le16( 2903 ocfs2_xattr_recs_per_xb(inode->i_sb)); 2904 xr->xt_list.l_next_free_rec = cpu_to_le16(1); 2905 xblk->xb_flags = cpu_to_le16(OCFS2_XATTR_INDEXED); 2906 } 2907 ocfs2_journal_dirty(ctxt->handle, new_bh); 2908 2909 /* Add it to the inode */ 2910 di->i_xattr_loc = cpu_to_le64(first_blkno); 2911 2912 spin_lock(&OCFS2_I(inode)->ip_lock); 2913 OCFS2_I(inode)->ip_dyn_features |= OCFS2_HAS_XATTR_FL; 2914 di->i_dyn_features = cpu_to_le16(OCFS2_I(inode)->ip_dyn_features); 2915 spin_unlock(&OCFS2_I(inode)->ip_lock); 2916 2917 ocfs2_journal_dirty(ctxt->handle, inode_bh); 2918 2919 *ret_bh = new_bh; 2920 new_bh = NULL; 2921 2922 end: 2923 brelse(new_bh); 2924 return ret; 2925 } 2926 2927 /* 2928 * ocfs2_xattr_block_set() 2929 * 2930 * Set, replace or remove an extended attribute into external block. 2931 * 2932 */ 2933 static int ocfs2_xattr_block_set(struct inode *inode, 2934 struct ocfs2_xattr_info *xi, 2935 struct ocfs2_xattr_search *xs, 2936 struct ocfs2_xattr_set_ctxt *ctxt) 2937 { 2938 struct buffer_head *new_bh = NULL; 2939 struct ocfs2_xattr_block *xblk = NULL; 2940 int ret; 2941 struct ocfs2_xa_loc loc; 2942 2943 if (!xs->xattr_bh) { 2944 ret = ocfs2_create_xattr_block(inode, xs->inode_bh, ctxt, 2945 0, &new_bh); 2946 if (ret) { 2947 mlog_errno(ret); 2948 goto end; 2949 } 2950 2951 xs->xattr_bh = new_bh; 2952 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; 2953 xs->header = &xblk->xb_attrs.xb_header; 2954 xs->base = (void *)xs->header; 2955 xs->end = (void *)xblk + inode->i_sb->s_blocksize; 2956 xs->here = xs->header->xh_entries; 2957 } else 2958 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; 2959 2960 if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) { 2961 ocfs2_init_xattr_block_xa_loc(&loc, inode, xs->xattr_bh, 2962 xs->not_found ? NULL : xs->here); 2963 2964 ret = ocfs2_xa_set(&loc, xi, ctxt); 2965 if (!ret) 2966 xs->here = loc.xl_entry; 2967 else if ((ret != -ENOSPC) || ctxt->set_abort) 2968 goto end; 2969 else { 2970 ret = ocfs2_xattr_create_index_block(inode, xs, ctxt); 2971 if (ret) 2972 goto end; 2973 } 2974 } 2975 2976 if (le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED) 2977 ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt); 2978 2979 end: 2980 return ret; 2981 } 2982 2983 /* Check whether the new xattr can be inserted into the inode. */ 2984 static int ocfs2_xattr_can_be_in_inode(struct inode *inode, 2985 struct ocfs2_xattr_info *xi, 2986 struct ocfs2_xattr_search *xs) 2987 { 2988 struct ocfs2_xattr_entry *last; 2989 int free, i; 2990 size_t min_offs = xs->end - xs->base; 2991 2992 if (!xs->header) 2993 return 0; 2994 2995 last = xs->header->xh_entries; 2996 2997 for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) { 2998 size_t offs = le16_to_cpu(last->xe_name_offset); 2999 if (offs < min_offs) 3000 min_offs = offs; 3001 last += 1; 3002 } 3003 3004 free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP; 3005 if (free < 0) 3006 return 0; 3007 3008 BUG_ON(!xs->not_found); 3009 3010 if (free >= (sizeof(struct ocfs2_xattr_entry) + namevalue_size_xi(xi))) 3011 return 1; 3012 3013 return 0; 3014 } 3015 3016 static int ocfs2_calc_xattr_set_need(struct inode *inode, 3017 struct ocfs2_dinode *di, 3018 struct ocfs2_xattr_info *xi, 3019 struct ocfs2_xattr_search *xis, 3020 struct ocfs2_xattr_search *xbs, 3021 int *clusters_need, 3022 int *meta_need, 3023 int *credits_need) 3024 { 3025 int ret = 0, old_in_xb = 0; 3026 int clusters_add = 0, meta_add = 0, credits = 0; 3027 struct buffer_head *bh = NULL; 3028 struct ocfs2_xattr_block *xb = NULL; 3029 struct ocfs2_xattr_entry *xe = NULL; 3030 struct ocfs2_xattr_value_root *xv = NULL; 3031 char *base = NULL; 3032 int name_offset, name_len = 0; 3033 u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, 3034 xi->xi_value_len); 3035 u64 value_size; 3036 3037 /* 3038 * Calculate the clusters we need to write. 3039 * No matter whether we replace an old one or add a new one, 3040 * we need this for writing. 3041 */ 3042 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) 3043 credits += new_clusters * 3044 ocfs2_clusters_to_blocks(inode->i_sb, 1); 3045 3046 if (xis->not_found && xbs->not_found) { 3047 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3048 3049 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 3050 clusters_add += new_clusters; 3051 credits += ocfs2_calc_extend_credits(inode->i_sb, 3052 &def_xv.xv.xr_list, 3053 new_clusters); 3054 } 3055 3056 goto meta_guess; 3057 } 3058 3059 if (!xis->not_found) { 3060 xe = xis->here; 3061 name_offset = le16_to_cpu(xe->xe_name_offset); 3062 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 3063 base = xis->base; 3064 credits += OCFS2_INODE_UPDATE_CREDITS; 3065 } else { 3066 int i, block_off = 0; 3067 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; 3068 xe = xbs->here; 3069 name_offset = le16_to_cpu(xe->xe_name_offset); 3070 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 3071 i = xbs->here - xbs->header->xh_entries; 3072 old_in_xb = 1; 3073 3074 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 3075 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 3076 bucket_xh(xbs->bucket), 3077 i, &block_off, 3078 &name_offset); 3079 base = bucket_block(xbs->bucket, block_off); 3080 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3081 } else { 3082 base = xbs->base; 3083 credits += OCFS2_XATTR_BLOCK_UPDATE_CREDITS; 3084 } 3085 } 3086 3087 /* 3088 * delete a xattr doesn't need metadata and cluster allocation. 3089 * so just calculate the credits and return. 3090 * 3091 * The credits for removing the value tree will be extended 3092 * by ocfs2_remove_extent itself. 3093 */ 3094 if (!xi->xi_value) { 3095 if (!ocfs2_xattr_is_local(xe)) 3096 credits += ocfs2_remove_extent_credits(inode->i_sb); 3097 3098 goto out; 3099 } 3100 3101 /* do cluster allocation guess first. */ 3102 value_size = le64_to_cpu(xe->xe_value_size); 3103 3104 if (old_in_xb) { 3105 /* 3106 * In xattr set, we always try to set the xe in inode first, 3107 * so if it can be inserted into inode successfully, the old 3108 * one will be removed from the xattr block, and this xattr 3109 * will be inserted into inode as a new xattr in inode. 3110 */ 3111 if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) { 3112 clusters_add += new_clusters; 3113 credits += ocfs2_remove_extent_credits(inode->i_sb) + 3114 OCFS2_INODE_UPDATE_CREDITS; 3115 if (!ocfs2_xattr_is_local(xe)) 3116 credits += ocfs2_calc_extend_credits( 3117 inode->i_sb, 3118 &def_xv.xv.xr_list, 3119 new_clusters); 3120 goto out; 3121 } 3122 } 3123 3124 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 3125 /* the new values will be stored outside. */ 3126 u32 old_clusters = 0; 3127 3128 if (!ocfs2_xattr_is_local(xe)) { 3129 old_clusters = ocfs2_clusters_for_bytes(inode->i_sb, 3130 value_size); 3131 xv = (struct ocfs2_xattr_value_root *) 3132 (base + name_offset + name_len); 3133 value_size = OCFS2_XATTR_ROOT_SIZE; 3134 } else 3135 xv = &def_xv.xv; 3136 3137 if (old_clusters >= new_clusters) { 3138 credits += ocfs2_remove_extent_credits(inode->i_sb); 3139 goto out; 3140 } else { 3141 meta_add += ocfs2_extend_meta_needed(&xv->xr_list); 3142 clusters_add += new_clusters - old_clusters; 3143 credits += ocfs2_calc_extend_credits(inode->i_sb, 3144 &xv->xr_list, 3145 new_clusters - 3146 old_clusters); 3147 if (value_size >= OCFS2_XATTR_ROOT_SIZE) 3148 goto out; 3149 } 3150 } else { 3151 /* 3152 * Now the new value will be stored inside. So if the new 3153 * value is smaller than the size of value root or the old 3154 * value, we don't need any allocation, otherwise we have 3155 * to guess metadata allocation. 3156 */ 3157 if ((ocfs2_xattr_is_local(xe) && 3158 (value_size >= xi->xi_value_len)) || 3159 (!ocfs2_xattr_is_local(xe) && 3160 OCFS2_XATTR_ROOT_SIZE >= xi->xi_value_len)) 3161 goto out; 3162 } 3163 3164 meta_guess: 3165 /* calculate metadata allocation. */ 3166 if (di->i_xattr_loc) { 3167 if (!xbs->xattr_bh) { 3168 ret = ocfs2_read_xattr_block(inode, 3169 le64_to_cpu(di->i_xattr_loc), 3170 &bh); 3171 if (ret) { 3172 mlog_errno(ret); 3173 goto out; 3174 } 3175 3176 xb = (struct ocfs2_xattr_block *)bh->b_data; 3177 } else 3178 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; 3179 3180 /* 3181 * If there is already an xattr tree, good, we can calculate 3182 * like other b-trees. Otherwise we may have the chance of 3183 * create a tree, the credit calculation is borrowed from 3184 * ocfs2_calc_extend_credits with root_el = NULL. And the 3185 * new tree will be cluster based, so no meta is needed. 3186 */ 3187 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 3188 struct ocfs2_extent_list *el = 3189 &xb->xb_attrs.xb_root.xt_list; 3190 meta_add += ocfs2_extend_meta_needed(el); 3191 credits += ocfs2_calc_extend_credits(inode->i_sb, 3192 el, 1); 3193 } else 3194 credits += OCFS2_SUBALLOC_ALLOC + 1; 3195 3196 /* 3197 * This cluster will be used either for new bucket or for 3198 * new xattr block. 3199 * If the cluster size is the same as the bucket size, one 3200 * more is needed since we may need to extend the bucket 3201 * also. 3202 */ 3203 clusters_add += 1; 3204 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3205 if (OCFS2_XATTR_BUCKET_SIZE == 3206 OCFS2_SB(inode->i_sb)->s_clustersize) { 3207 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3208 clusters_add += 1; 3209 } 3210 } else { 3211 meta_add += 1; 3212 credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; 3213 } 3214 out: 3215 if (clusters_need) 3216 *clusters_need = clusters_add; 3217 if (meta_need) 3218 *meta_need = meta_add; 3219 if (credits_need) 3220 *credits_need = credits; 3221 brelse(bh); 3222 return ret; 3223 } 3224 3225 static int ocfs2_init_xattr_set_ctxt(struct inode *inode, 3226 struct ocfs2_dinode *di, 3227 struct ocfs2_xattr_info *xi, 3228 struct ocfs2_xattr_search *xis, 3229 struct ocfs2_xattr_search *xbs, 3230 struct ocfs2_xattr_set_ctxt *ctxt, 3231 int extra_meta, 3232 int *credits) 3233 { 3234 int clusters_add, meta_add, ret; 3235 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 3236 3237 memset(ctxt, 0, sizeof(struct ocfs2_xattr_set_ctxt)); 3238 3239 ocfs2_init_dealloc_ctxt(&ctxt->dealloc); 3240 3241 ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs, 3242 &clusters_add, &meta_add, credits); 3243 if (ret) { 3244 mlog_errno(ret); 3245 return ret; 3246 } 3247 3248 meta_add += extra_meta; 3249 mlog(0, "Set xattr %s, reserve meta blocks = %d, clusters = %d, " 3250 "credits = %d\n", xi->xi_name, meta_add, clusters_add, *credits); 3251 3252 if (meta_add) { 3253 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, 3254 &ctxt->meta_ac); 3255 if (ret) { 3256 mlog_errno(ret); 3257 goto out; 3258 } 3259 } 3260 3261 if (clusters_add) { 3262 ret = ocfs2_reserve_clusters(osb, clusters_add, &ctxt->data_ac); 3263 if (ret) 3264 mlog_errno(ret); 3265 } 3266 out: 3267 if (ret) { 3268 if (ctxt->meta_ac) { 3269 ocfs2_free_alloc_context(ctxt->meta_ac); 3270 ctxt->meta_ac = NULL; 3271 } 3272 3273 /* 3274 * We cannot have an error and a non null ctxt->data_ac. 3275 */ 3276 } 3277 3278 return ret; 3279 } 3280 3281 static int __ocfs2_xattr_set_handle(struct inode *inode, 3282 struct ocfs2_dinode *di, 3283 struct ocfs2_xattr_info *xi, 3284 struct ocfs2_xattr_search *xis, 3285 struct ocfs2_xattr_search *xbs, 3286 struct ocfs2_xattr_set_ctxt *ctxt) 3287 { 3288 int ret = 0, credits, old_found; 3289 3290 if (!xi->xi_value) { 3291 /* Remove existing extended attribute */ 3292 if (!xis->not_found) 3293 ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt); 3294 else if (!xbs->not_found) 3295 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); 3296 } else { 3297 /* We always try to set extended attribute into inode first*/ 3298 ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt); 3299 if (!ret && !xbs->not_found) { 3300 /* 3301 * If succeed and that extended attribute existing in 3302 * external block, then we will remove it. 3303 */ 3304 xi->xi_value = NULL; 3305 xi->xi_value_len = 0; 3306 3307 old_found = xis->not_found; 3308 xis->not_found = -ENODATA; 3309 ret = ocfs2_calc_xattr_set_need(inode, 3310 di, 3311 xi, 3312 xis, 3313 xbs, 3314 NULL, 3315 NULL, 3316 &credits); 3317 xis->not_found = old_found; 3318 if (ret) { 3319 mlog_errno(ret); 3320 goto out; 3321 } 3322 3323 ret = ocfs2_extend_trans(ctxt->handle, credits); 3324 if (ret) { 3325 mlog_errno(ret); 3326 goto out; 3327 } 3328 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); 3329 } else if ((ret == -ENOSPC) && !ctxt->set_abort) { 3330 if (di->i_xattr_loc && !xbs->xattr_bh) { 3331 ret = ocfs2_xattr_block_find(inode, 3332 xi->xi_name_index, 3333 xi->xi_name, xbs); 3334 if (ret) 3335 goto out; 3336 3337 old_found = xis->not_found; 3338 xis->not_found = -ENODATA; 3339 ret = ocfs2_calc_xattr_set_need(inode, 3340 di, 3341 xi, 3342 xis, 3343 xbs, 3344 NULL, 3345 NULL, 3346 &credits); 3347 xis->not_found = old_found; 3348 if (ret) { 3349 mlog_errno(ret); 3350 goto out; 3351 } 3352 3353 ret = ocfs2_extend_trans(ctxt->handle, credits); 3354 if (ret) { 3355 mlog_errno(ret); 3356 goto out; 3357 } 3358 } 3359 /* 3360 * If no space in inode, we will set extended attribute 3361 * into external block. 3362 */ 3363 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); 3364 if (ret) 3365 goto out; 3366 if (!xis->not_found) { 3367 /* 3368 * If succeed and that extended attribute 3369 * existing in inode, we will remove it. 3370 */ 3371 xi->xi_value = NULL; 3372 xi->xi_value_len = 0; 3373 xbs->not_found = -ENODATA; 3374 ret = ocfs2_calc_xattr_set_need(inode, 3375 di, 3376 xi, 3377 xis, 3378 xbs, 3379 NULL, 3380 NULL, 3381 &credits); 3382 if (ret) { 3383 mlog_errno(ret); 3384 goto out; 3385 } 3386 3387 ret = ocfs2_extend_trans(ctxt->handle, credits); 3388 if (ret) { 3389 mlog_errno(ret); 3390 goto out; 3391 } 3392 ret = ocfs2_xattr_ibody_set(inode, xi, 3393 xis, ctxt); 3394 } 3395 } 3396 } 3397 3398 if (!ret) { 3399 /* Update inode ctime. */ 3400 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), 3401 xis->inode_bh, 3402 OCFS2_JOURNAL_ACCESS_WRITE); 3403 if (ret) { 3404 mlog_errno(ret); 3405 goto out; 3406 } 3407 3408 inode->i_ctime = CURRENT_TIME; 3409 di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); 3410 di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); 3411 ocfs2_journal_dirty(ctxt->handle, xis->inode_bh); 3412 } 3413 out: 3414 return ret; 3415 } 3416 3417 /* 3418 * This function only called duing creating inode 3419 * for init security/acl xattrs of the new inode. 3420 * All transanction credits have been reserved in mknod. 3421 */ 3422 int ocfs2_xattr_set_handle(handle_t *handle, 3423 struct inode *inode, 3424 struct buffer_head *di_bh, 3425 int name_index, 3426 const char *name, 3427 const void *value, 3428 size_t value_len, 3429 int flags, 3430 struct ocfs2_alloc_context *meta_ac, 3431 struct ocfs2_alloc_context *data_ac) 3432 { 3433 struct ocfs2_dinode *di; 3434 int ret; 3435 3436 struct ocfs2_xattr_info xi = { 3437 .xi_name_index = name_index, 3438 .xi_name = name, 3439 .xi_name_len = strlen(name), 3440 .xi_value = value, 3441 .xi_value_len = value_len, 3442 }; 3443 3444 struct ocfs2_xattr_search xis = { 3445 .not_found = -ENODATA, 3446 }; 3447 3448 struct ocfs2_xattr_search xbs = { 3449 .not_found = -ENODATA, 3450 }; 3451 3452 struct ocfs2_xattr_set_ctxt ctxt = { 3453 .handle = handle, 3454 .meta_ac = meta_ac, 3455 .data_ac = data_ac, 3456 }; 3457 3458 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) 3459 return -EOPNOTSUPP; 3460 3461 /* 3462 * In extreme situation, may need xattr bucket when 3463 * block size is too small. And we have already reserved 3464 * the credits for bucket in mknod. 3465 */ 3466 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) { 3467 xbs.bucket = ocfs2_xattr_bucket_new(inode); 3468 if (!xbs.bucket) { 3469 mlog_errno(-ENOMEM); 3470 return -ENOMEM; 3471 } 3472 } 3473 3474 xis.inode_bh = xbs.inode_bh = di_bh; 3475 di = (struct ocfs2_dinode *)di_bh->b_data; 3476 3477 down_write(&OCFS2_I(inode)->ip_xattr_sem); 3478 3479 ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis); 3480 if (ret) 3481 goto cleanup; 3482 if (xis.not_found) { 3483 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs); 3484 if (ret) 3485 goto cleanup; 3486 } 3487 3488 ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt); 3489 3490 cleanup: 3491 up_write(&OCFS2_I(inode)->ip_xattr_sem); 3492 brelse(xbs.xattr_bh); 3493 ocfs2_xattr_bucket_free(xbs.bucket); 3494 3495 return ret; 3496 } 3497 3498 /* 3499 * ocfs2_xattr_set() 3500 * 3501 * Set, replace or remove an extended attribute for this inode. 3502 * value is NULL to remove an existing extended attribute, else either 3503 * create or replace an extended attribute. 3504 */ 3505 int ocfs2_xattr_set(struct inode *inode, 3506 int name_index, 3507 const char *name, 3508 const void *value, 3509 size_t value_len, 3510 int flags) 3511 { 3512 struct buffer_head *di_bh = NULL; 3513 struct ocfs2_dinode *di; 3514 int ret, credits, ref_meta = 0, ref_credits = 0; 3515 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 3516 struct inode *tl_inode = osb->osb_tl_inode; 3517 struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, }; 3518 struct ocfs2_refcount_tree *ref_tree = NULL; 3519 3520 struct ocfs2_xattr_info xi = { 3521 .xi_name_index = name_index, 3522 .xi_name = name, 3523 .xi_name_len = strlen(name), 3524 .xi_value = value, 3525 .xi_value_len = value_len, 3526 }; 3527 3528 struct ocfs2_xattr_search xis = { 3529 .not_found = -ENODATA, 3530 }; 3531 3532 struct ocfs2_xattr_search xbs = { 3533 .not_found = -ENODATA, 3534 }; 3535 3536 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) 3537 return -EOPNOTSUPP; 3538 3539 /* 3540 * Only xbs will be used on indexed trees. xis doesn't need a 3541 * bucket. 3542 */ 3543 xbs.bucket = ocfs2_xattr_bucket_new(inode); 3544 if (!xbs.bucket) { 3545 mlog_errno(-ENOMEM); 3546 return -ENOMEM; 3547 } 3548 3549 ret = ocfs2_inode_lock(inode, &di_bh, 1); 3550 if (ret < 0) { 3551 mlog_errno(ret); 3552 goto cleanup_nolock; 3553 } 3554 xis.inode_bh = xbs.inode_bh = di_bh; 3555 di = (struct ocfs2_dinode *)di_bh->b_data; 3556 3557 down_write(&OCFS2_I(inode)->ip_xattr_sem); 3558 /* 3559 * Scan inode and external block to find the same name 3560 * extended attribute and collect search infomation. 3561 */ 3562 ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis); 3563 if (ret) 3564 goto cleanup; 3565 if (xis.not_found) { 3566 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs); 3567 if (ret) 3568 goto cleanup; 3569 } 3570 3571 if (xis.not_found && xbs.not_found) { 3572 ret = -ENODATA; 3573 if (flags & XATTR_REPLACE) 3574 goto cleanup; 3575 ret = 0; 3576 if (!value) 3577 goto cleanup; 3578 } else { 3579 ret = -EEXIST; 3580 if (flags & XATTR_CREATE) 3581 goto cleanup; 3582 } 3583 3584 /* Check whether the value is refcounted and do some prepartion. */ 3585 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL && 3586 (!xis.not_found || !xbs.not_found)) { 3587 ret = ocfs2_prepare_refcount_xattr(inode, di, &xi, 3588 &xis, &xbs, &ref_tree, 3589 &ref_meta, &ref_credits); 3590 if (ret) { 3591 mlog_errno(ret); 3592 goto cleanup; 3593 } 3594 } 3595 3596 mutex_lock(&tl_inode->i_mutex); 3597 3598 if (ocfs2_truncate_log_needs_flush(osb)) { 3599 ret = __ocfs2_flush_truncate_log(osb); 3600 if (ret < 0) { 3601 mutex_unlock(&tl_inode->i_mutex); 3602 mlog_errno(ret); 3603 goto cleanup; 3604 } 3605 } 3606 mutex_unlock(&tl_inode->i_mutex); 3607 3608 ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis, 3609 &xbs, &ctxt, ref_meta, &credits); 3610 if (ret) { 3611 mlog_errno(ret); 3612 goto cleanup; 3613 } 3614 3615 /* we need to update inode's ctime field, so add credit for it. */ 3616 credits += OCFS2_INODE_UPDATE_CREDITS; 3617 ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits); 3618 if (IS_ERR(ctxt.handle)) { 3619 ret = PTR_ERR(ctxt.handle); 3620 mlog_errno(ret); 3621 goto cleanup; 3622 } 3623 3624 ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt); 3625 3626 ocfs2_commit_trans(osb, ctxt.handle); 3627 3628 if (ctxt.data_ac) 3629 ocfs2_free_alloc_context(ctxt.data_ac); 3630 if (ctxt.meta_ac) 3631 ocfs2_free_alloc_context(ctxt.meta_ac); 3632 if (ocfs2_dealloc_has_cluster(&ctxt.dealloc)) 3633 ocfs2_schedule_truncate_log_flush(osb, 1); 3634 ocfs2_run_deallocs(osb, &ctxt.dealloc); 3635 3636 cleanup: 3637 if (ref_tree) 3638 ocfs2_unlock_refcount_tree(osb, ref_tree, 1); 3639 up_write(&OCFS2_I(inode)->ip_xattr_sem); 3640 if (!value && !ret) { 3641 ret = ocfs2_try_remove_refcount_tree(inode, di_bh); 3642 if (ret) 3643 mlog_errno(ret); 3644 } 3645 ocfs2_inode_unlock(inode, 1); 3646 cleanup_nolock: 3647 brelse(di_bh); 3648 brelse(xbs.xattr_bh); 3649 ocfs2_xattr_bucket_free(xbs.bucket); 3650 3651 return ret; 3652 } 3653 3654 /* 3655 * Find the xattr extent rec which may contains name_hash. 3656 * e_cpos will be the first name hash of the xattr rec. 3657 * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list. 3658 */ 3659 static int ocfs2_xattr_get_rec(struct inode *inode, 3660 u32 name_hash, 3661 u64 *p_blkno, 3662 u32 *e_cpos, 3663 u32 *num_clusters, 3664 struct ocfs2_extent_list *el) 3665 { 3666 int ret = 0, i; 3667 struct buffer_head *eb_bh = NULL; 3668 struct ocfs2_extent_block *eb; 3669 struct ocfs2_extent_rec *rec = NULL; 3670 u64 e_blkno = 0; 3671 3672 if (el->l_tree_depth) { 3673 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, name_hash, 3674 &eb_bh); 3675 if (ret) { 3676 mlog_errno(ret); 3677 goto out; 3678 } 3679 3680 eb = (struct ocfs2_extent_block *) eb_bh->b_data; 3681 el = &eb->h_list; 3682 3683 if (el->l_tree_depth) { 3684 ocfs2_error(inode->i_sb, 3685 "Inode %lu has non zero tree depth in " 3686 "xattr tree block %llu\n", inode->i_ino, 3687 (unsigned long long)eb_bh->b_blocknr); 3688 ret = -EROFS; 3689 goto out; 3690 } 3691 } 3692 3693 for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) { 3694 rec = &el->l_recs[i]; 3695 3696 if (le32_to_cpu(rec->e_cpos) <= name_hash) { 3697 e_blkno = le64_to_cpu(rec->e_blkno); 3698 break; 3699 } 3700 } 3701 3702 if (!e_blkno) { 3703 ocfs2_error(inode->i_sb, "Inode %lu has bad extent " 3704 "record (%u, %u, 0) in xattr", inode->i_ino, 3705 le32_to_cpu(rec->e_cpos), 3706 ocfs2_rec_clusters(el, rec)); 3707 ret = -EROFS; 3708 goto out; 3709 } 3710 3711 *p_blkno = le64_to_cpu(rec->e_blkno); 3712 *num_clusters = le16_to_cpu(rec->e_leaf_clusters); 3713 if (e_cpos) 3714 *e_cpos = le32_to_cpu(rec->e_cpos); 3715 out: 3716 brelse(eb_bh); 3717 return ret; 3718 } 3719 3720 typedef int (xattr_bucket_func)(struct inode *inode, 3721 struct ocfs2_xattr_bucket *bucket, 3722 void *para); 3723 3724 static int ocfs2_find_xe_in_bucket(struct inode *inode, 3725 struct ocfs2_xattr_bucket *bucket, 3726 int name_index, 3727 const char *name, 3728 u32 name_hash, 3729 u16 *xe_index, 3730 int *found) 3731 { 3732 int i, ret = 0, cmp = 1, block_off, new_offset; 3733 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 3734 size_t name_len = strlen(name); 3735 struct ocfs2_xattr_entry *xe = NULL; 3736 char *xe_name; 3737 3738 /* 3739 * We don't use binary search in the bucket because there 3740 * may be multiple entries with the same name hash. 3741 */ 3742 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 3743 xe = &xh->xh_entries[i]; 3744 3745 if (name_hash > le32_to_cpu(xe->xe_name_hash)) 3746 continue; 3747 else if (name_hash < le32_to_cpu(xe->xe_name_hash)) 3748 break; 3749 3750 cmp = name_index - ocfs2_xattr_get_type(xe); 3751 if (!cmp) 3752 cmp = name_len - xe->xe_name_len; 3753 if (cmp) 3754 continue; 3755 3756 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 3757 xh, 3758 i, 3759 &block_off, 3760 &new_offset); 3761 if (ret) { 3762 mlog_errno(ret); 3763 break; 3764 } 3765 3766 3767 xe_name = bucket_block(bucket, block_off) + new_offset; 3768 if (!memcmp(name, xe_name, name_len)) { 3769 *xe_index = i; 3770 *found = 1; 3771 ret = 0; 3772 break; 3773 } 3774 } 3775 3776 return ret; 3777 } 3778 3779 /* 3780 * Find the specified xattr entry in a series of buckets. 3781 * This series start from p_blkno and last for num_clusters. 3782 * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains 3783 * the num of the valid buckets. 3784 * 3785 * Return the buffer_head this xattr should reside in. And if the xattr's 3786 * hash is in the gap of 2 buckets, return the lower bucket. 3787 */ 3788 static int ocfs2_xattr_bucket_find(struct inode *inode, 3789 int name_index, 3790 const char *name, 3791 u32 name_hash, 3792 u64 p_blkno, 3793 u32 first_hash, 3794 u32 num_clusters, 3795 struct ocfs2_xattr_search *xs) 3796 { 3797 int ret, found = 0; 3798 struct ocfs2_xattr_header *xh = NULL; 3799 struct ocfs2_xattr_entry *xe = NULL; 3800 u16 index = 0; 3801 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3802 int low_bucket = 0, bucket, high_bucket; 3803 struct ocfs2_xattr_bucket *search; 3804 u32 last_hash; 3805 u64 blkno, lower_blkno = 0; 3806 3807 search = ocfs2_xattr_bucket_new(inode); 3808 if (!search) { 3809 ret = -ENOMEM; 3810 mlog_errno(ret); 3811 goto out; 3812 } 3813 3814 ret = ocfs2_read_xattr_bucket(search, p_blkno); 3815 if (ret) { 3816 mlog_errno(ret); 3817 goto out; 3818 } 3819 3820 xh = bucket_xh(search); 3821 high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1; 3822 while (low_bucket <= high_bucket) { 3823 ocfs2_xattr_bucket_relse(search); 3824 3825 bucket = (low_bucket + high_bucket) / 2; 3826 blkno = p_blkno + bucket * blk_per_bucket; 3827 ret = ocfs2_read_xattr_bucket(search, blkno); 3828 if (ret) { 3829 mlog_errno(ret); 3830 goto out; 3831 } 3832 3833 xh = bucket_xh(search); 3834 xe = &xh->xh_entries[0]; 3835 if (name_hash < le32_to_cpu(xe->xe_name_hash)) { 3836 high_bucket = bucket - 1; 3837 continue; 3838 } 3839 3840 /* 3841 * Check whether the hash of the last entry in our 3842 * bucket is larger than the search one. for an empty 3843 * bucket, the last one is also the first one. 3844 */ 3845 if (xh->xh_count) 3846 xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1]; 3847 3848 last_hash = le32_to_cpu(xe->xe_name_hash); 3849 3850 /* record lower_blkno which may be the insert place. */ 3851 lower_blkno = blkno; 3852 3853 if (name_hash > le32_to_cpu(xe->xe_name_hash)) { 3854 low_bucket = bucket + 1; 3855 continue; 3856 } 3857 3858 /* the searched xattr should reside in this bucket if exists. */ 3859 ret = ocfs2_find_xe_in_bucket(inode, search, 3860 name_index, name, name_hash, 3861 &index, &found); 3862 if (ret) { 3863 mlog_errno(ret); 3864 goto out; 3865 } 3866 break; 3867 } 3868 3869 /* 3870 * Record the bucket we have found. 3871 * When the xattr's hash value is in the gap of 2 buckets, we will 3872 * always set it to the previous bucket. 3873 */ 3874 if (!lower_blkno) 3875 lower_blkno = p_blkno; 3876 3877 /* This should be in cache - we just read it during the search */ 3878 ret = ocfs2_read_xattr_bucket(xs->bucket, lower_blkno); 3879 if (ret) { 3880 mlog_errno(ret); 3881 goto out; 3882 } 3883 3884 xs->header = bucket_xh(xs->bucket); 3885 xs->base = bucket_block(xs->bucket, 0); 3886 xs->end = xs->base + inode->i_sb->s_blocksize; 3887 3888 if (found) { 3889 xs->here = &xs->header->xh_entries[index]; 3890 mlog(0, "find xattr %s in bucket %llu, entry = %u\n", name, 3891 (unsigned long long)bucket_blkno(xs->bucket), index); 3892 } else 3893 ret = -ENODATA; 3894 3895 out: 3896 ocfs2_xattr_bucket_free(search); 3897 return ret; 3898 } 3899 3900 static int ocfs2_xattr_index_block_find(struct inode *inode, 3901 struct buffer_head *root_bh, 3902 int name_index, 3903 const char *name, 3904 struct ocfs2_xattr_search *xs) 3905 { 3906 int ret; 3907 struct ocfs2_xattr_block *xb = 3908 (struct ocfs2_xattr_block *)root_bh->b_data; 3909 struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root; 3910 struct ocfs2_extent_list *el = &xb_root->xt_list; 3911 u64 p_blkno = 0; 3912 u32 first_hash, num_clusters = 0; 3913 u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name)); 3914 3915 if (le16_to_cpu(el->l_next_free_rec) == 0) 3916 return -ENODATA; 3917 3918 mlog(0, "find xattr %s, hash = %u, index = %d in xattr tree\n", 3919 name, name_hash, name_index); 3920 3921 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash, 3922 &num_clusters, el); 3923 if (ret) { 3924 mlog_errno(ret); 3925 goto out; 3926 } 3927 3928 BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash); 3929 3930 mlog(0, "find xattr extent rec %u clusters from %llu, the first hash " 3931 "in the rec is %u\n", num_clusters, (unsigned long long)p_blkno, 3932 first_hash); 3933 3934 ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash, 3935 p_blkno, first_hash, num_clusters, xs); 3936 3937 out: 3938 return ret; 3939 } 3940 3941 static int ocfs2_iterate_xattr_buckets(struct inode *inode, 3942 u64 blkno, 3943 u32 clusters, 3944 xattr_bucket_func *func, 3945 void *para) 3946 { 3947 int i, ret = 0; 3948 u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)); 3949 u32 num_buckets = clusters * bpc; 3950 struct ocfs2_xattr_bucket *bucket; 3951 3952 bucket = ocfs2_xattr_bucket_new(inode); 3953 if (!bucket) { 3954 mlog_errno(-ENOMEM); 3955 return -ENOMEM; 3956 } 3957 3958 mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n", 3959 clusters, (unsigned long long)blkno); 3960 3961 for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) { 3962 ret = ocfs2_read_xattr_bucket(bucket, blkno); 3963 if (ret) { 3964 mlog_errno(ret); 3965 break; 3966 } 3967 3968 /* 3969 * The real bucket num in this series of blocks is stored 3970 * in the 1st bucket. 3971 */ 3972 if (i == 0) 3973 num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets); 3974 3975 mlog(0, "iterating xattr bucket %llu, first hash %u\n", 3976 (unsigned long long)blkno, 3977 le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash)); 3978 if (func) { 3979 ret = func(inode, bucket, para); 3980 if (ret && ret != -ERANGE) 3981 mlog_errno(ret); 3982 /* Fall through to bucket_relse() */ 3983 } 3984 3985 ocfs2_xattr_bucket_relse(bucket); 3986 if (ret) 3987 break; 3988 } 3989 3990 ocfs2_xattr_bucket_free(bucket); 3991 return ret; 3992 } 3993 3994 struct ocfs2_xattr_tree_list { 3995 char *buffer; 3996 size_t buffer_size; 3997 size_t result; 3998 }; 3999 4000 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb, 4001 struct ocfs2_xattr_header *xh, 4002 int index, 4003 int *block_off, 4004 int *new_offset) 4005 { 4006 u16 name_offset; 4007 4008 if (index < 0 || index >= le16_to_cpu(xh->xh_count)) 4009 return -EINVAL; 4010 4011 name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset); 4012 4013 *block_off = name_offset >> sb->s_blocksize_bits; 4014 *new_offset = name_offset % sb->s_blocksize; 4015 4016 return 0; 4017 } 4018 4019 static int ocfs2_list_xattr_bucket(struct inode *inode, 4020 struct ocfs2_xattr_bucket *bucket, 4021 void *para) 4022 { 4023 int ret = 0, type; 4024 struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para; 4025 int i, block_off, new_offset; 4026 const char *prefix, *name; 4027 4028 for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) { 4029 struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i]; 4030 type = ocfs2_xattr_get_type(entry); 4031 prefix = ocfs2_xattr_prefix(type); 4032 4033 if (prefix) { 4034 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 4035 bucket_xh(bucket), 4036 i, 4037 &block_off, 4038 &new_offset); 4039 if (ret) 4040 break; 4041 4042 name = (const char *)bucket_block(bucket, block_off) + 4043 new_offset; 4044 ret = ocfs2_xattr_list_entry(xl->buffer, 4045 xl->buffer_size, 4046 &xl->result, 4047 prefix, name, 4048 entry->xe_name_len); 4049 if (ret) 4050 break; 4051 } 4052 } 4053 4054 return ret; 4055 } 4056 4057 static int ocfs2_iterate_xattr_index_block(struct inode *inode, 4058 struct buffer_head *blk_bh, 4059 xattr_tree_rec_func *rec_func, 4060 void *para) 4061 { 4062 struct ocfs2_xattr_block *xb = 4063 (struct ocfs2_xattr_block *)blk_bh->b_data; 4064 struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list; 4065 int ret = 0; 4066 u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0; 4067 u64 p_blkno = 0; 4068 4069 if (!el->l_next_free_rec || !rec_func) 4070 return 0; 4071 4072 while (name_hash > 0) { 4073 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, 4074 &e_cpos, &num_clusters, el); 4075 if (ret) { 4076 mlog_errno(ret); 4077 break; 4078 } 4079 4080 ret = rec_func(inode, blk_bh, p_blkno, e_cpos, 4081 num_clusters, para); 4082 if (ret) { 4083 if (ret != -ERANGE) 4084 mlog_errno(ret); 4085 break; 4086 } 4087 4088 if (e_cpos == 0) 4089 break; 4090 4091 name_hash = e_cpos - 1; 4092 } 4093 4094 return ret; 4095 4096 } 4097 4098 static int ocfs2_list_xattr_tree_rec(struct inode *inode, 4099 struct buffer_head *root_bh, 4100 u64 blkno, u32 cpos, u32 len, void *para) 4101 { 4102 return ocfs2_iterate_xattr_buckets(inode, blkno, len, 4103 ocfs2_list_xattr_bucket, para); 4104 } 4105 4106 static int ocfs2_xattr_tree_list_index_block(struct inode *inode, 4107 struct buffer_head *blk_bh, 4108 char *buffer, 4109 size_t buffer_size) 4110 { 4111 int ret; 4112 struct ocfs2_xattr_tree_list xl = { 4113 .buffer = buffer, 4114 .buffer_size = buffer_size, 4115 .result = 0, 4116 }; 4117 4118 ret = ocfs2_iterate_xattr_index_block(inode, blk_bh, 4119 ocfs2_list_xattr_tree_rec, &xl); 4120 if (ret) { 4121 mlog_errno(ret); 4122 goto out; 4123 } 4124 4125 ret = xl.result; 4126 out: 4127 return ret; 4128 } 4129 4130 static int cmp_xe(const void *a, const void *b) 4131 { 4132 const struct ocfs2_xattr_entry *l = a, *r = b; 4133 u32 l_hash = le32_to_cpu(l->xe_name_hash); 4134 u32 r_hash = le32_to_cpu(r->xe_name_hash); 4135 4136 if (l_hash > r_hash) 4137 return 1; 4138 if (l_hash < r_hash) 4139 return -1; 4140 return 0; 4141 } 4142 4143 static void swap_xe(void *a, void *b, int size) 4144 { 4145 struct ocfs2_xattr_entry *l = a, *r = b, tmp; 4146 4147 tmp = *l; 4148 memcpy(l, r, sizeof(struct ocfs2_xattr_entry)); 4149 memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry)); 4150 } 4151 4152 /* 4153 * When the ocfs2_xattr_block is filled up, new bucket will be created 4154 * and all the xattr entries will be moved to the new bucket. 4155 * The header goes at the start of the bucket, and the names+values are 4156 * filled from the end. This is why *target starts as the last buffer. 4157 * Note: we need to sort the entries since they are not saved in order 4158 * in the ocfs2_xattr_block. 4159 */ 4160 static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode, 4161 struct buffer_head *xb_bh, 4162 struct ocfs2_xattr_bucket *bucket) 4163 { 4164 int i, blocksize = inode->i_sb->s_blocksize; 4165 int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 4166 u16 offset, size, off_change; 4167 struct ocfs2_xattr_entry *xe; 4168 struct ocfs2_xattr_block *xb = 4169 (struct ocfs2_xattr_block *)xb_bh->b_data; 4170 struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header; 4171 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 4172 u16 count = le16_to_cpu(xb_xh->xh_count); 4173 char *src = xb_bh->b_data; 4174 char *target = bucket_block(bucket, blks - 1); 4175 4176 mlog(0, "cp xattr from block %llu to bucket %llu\n", 4177 (unsigned long long)xb_bh->b_blocknr, 4178 (unsigned long long)bucket_blkno(bucket)); 4179 4180 for (i = 0; i < blks; i++) 4181 memset(bucket_block(bucket, i), 0, blocksize); 4182 4183 /* 4184 * Since the xe_name_offset is based on ocfs2_xattr_header, 4185 * there is a offset change corresponding to the change of 4186 * ocfs2_xattr_header's position. 4187 */ 4188 off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); 4189 xe = &xb_xh->xh_entries[count - 1]; 4190 offset = le16_to_cpu(xe->xe_name_offset) + off_change; 4191 size = blocksize - offset; 4192 4193 /* copy all the names and values. */ 4194 memcpy(target + offset, src + offset, size); 4195 4196 /* Init new header now. */ 4197 xh->xh_count = xb_xh->xh_count; 4198 xh->xh_num_buckets = cpu_to_le16(1); 4199 xh->xh_name_value_len = cpu_to_le16(size); 4200 xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size); 4201 4202 /* copy all the entries. */ 4203 target = bucket_block(bucket, 0); 4204 offset = offsetof(struct ocfs2_xattr_header, xh_entries); 4205 size = count * sizeof(struct ocfs2_xattr_entry); 4206 memcpy(target + offset, (char *)xb_xh + offset, size); 4207 4208 /* Change the xe offset for all the xe because of the move. */ 4209 off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize + 4210 offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); 4211 for (i = 0; i < count; i++) 4212 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change); 4213 4214 mlog(0, "copy entry: start = %u, size = %u, offset_change = %u\n", 4215 offset, size, off_change); 4216 4217 sort(target + offset, count, sizeof(struct ocfs2_xattr_entry), 4218 cmp_xe, swap_xe); 4219 } 4220 4221 /* 4222 * After we move xattr from block to index btree, we have to 4223 * update ocfs2_xattr_search to the new xe and base. 4224 * 4225 * When the entry is in xattr block, xattr_bh indicates the storage place. 4226 * While if the entry is in index b-tree, "bucket" indicates the 4227 * real place of the xattr. 4228 */ 4229 static void ocfs2_xattr_update_xattr_search(struct inode *inode, 4230 struct ocfs2_xattr_search *xs, 4231 struct buffer_head *old_bh) 4232 { 4233 char *buf = old_bh->b_data; 4234 struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf; 4235 struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header; 4236 int i; 4237 4238 xs->header = bucket_xh(xs->bucket); 4239 xs->base = bucket_block(xs->bucket, 0); 4240 xs->end = xs->base + inode->i_sb->s_blocksize; 4241 4242 if (xs->not_found) 4243 return; 4244 4245 i = xs->here - old_xh->xh_entries; 4246 xs->here = &xs->header->xh_entries[i]; 4247 } 4248 4249 static int ocfs2_xattr_create_index_block(struct inode *inode, 4250 struct ocfs2_xattr_search *xs, 4251 struct ocfs2_xattr_set_ctxt *ctxt) 4252 { 4253 int ret; 4254 u32 bit_off, len; 4255 u64 blkno; 4256 handle_t *handle = ctxt->handle; 4257 struct ocfs2_inode_info *oi = OCFS2_I(inode); 4258 struct buffer_head *xb_bh = xs->xattr_bh; 4259 struct ocfs2_xattr_block *xb = 4260 (struct ocfs2_xattr_block *)xb_bh->b_data; 4261 struct ocfs2_xattr_tree_root *xr; 4262 u16 xb_flags = le16_to_cpu(xb->xb_flags); 4263 4264 mlog(0, "create xattr index block for %llu\n", 4265 (unsigned long long)xb_bh->b_blocknr); 4266 4267 BUG_ON(xb_flags & OCFS2_XATTR_INDEXED); 4268 BUG_ON(!xs->bucket); 4269 4270 /* 4271 * XXX: 4272 * We can use this lock for now, and maybe move to a dedicated mutex 4273 * if performance becomes a problem later. 4274 */ 4275 down_write(&oi->ip_alloc_sem); 4276 4277 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), xb_bh, 4278 OCFS2_JOURNAL_ACCESS_WRITE); 4279 if (ret) { 4280 mlog_errno(ret); 4281 goto out; 4282 } 4283 4284 ret = __ocfs2_claim_clusters(handle, ctxt->data_ac, 4285 1, 1, &bit_off, &len); 4286 if (ret) { 4287 mlog_errno(ret); 4288 goto out; 4289 } 4290 4291 /* 4292 * The bucket may spread in many blocks, and 4293 * we will only touch the 1st block and the last block 4294 * in the whole bucket(one for entry and one for data). 4295 */ 4296 blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off); 4297 4298 mlog(0, "allocate 1 cluster from %llu to xattr block\n", 4299 (unsigned long long)blkno); 4300 4301 ret = ocfs2_init_xattr_bucket(xs->bucket, blkno); 4302 if (ret) { 4303 mlog_errno(ret); 4304 goto out; 4305 } 4306 4307 ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket, 4308 OCFS2_JOURNAL_ACCESS_CREATE); 4309 if (ret) { 4310 mlog_errno(ret); 4311 goto out; 4312 } 4313 4314 ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket); 4315 ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket); 4316 4317 ocfs2_xattr_update_xattr_search(inode, xs, xb_bh); 4318 4319 /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */ 4320 memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize - 4321 offsetof(struct ocfs2_xattr_block, xb_attrs)); 4322 4323 xr = &xb->xb_attrs.xb_root; 4324 xr->xt_clusters = cpu_to_le32(1); 4325 xr->xt_last_eb_blk = 0; 4326 xr->xt_list.l_tree_depth = 0; 4327 xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb)); 4328 xr->xt_list.l_next_free_rec = cpu_to_le16(1); 4329 4330 xr->xt_list.l_recs[0].e_cpos = 0; 4331 xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno); 4332 xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1); 4333 4334 xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED); 4335 4336 ocfs2_journal_dirty(handle, xb_bh); 4337 4338 out: 4339 up_write(&oi->ip_alloc_sem); 4340 4341 return ret; 4342 } 4343 4344 static int cmp_xe_offset(const void *a, const void *b) 4345 { 4346 const struct ocfs2_xattr_entry *l = a, *r = b; 4347 u32 l_name_offset = le16_to_cpu(l->xe_name_offset); 4348 u32 r_name_offset = le16_to_cpu(r->xe_name_offset); 4349 4350 if (l_name_offset < r_name_offset) 4351 return 1; 4352 if (l_name_offset > r_name_offset) 4353 return -1; 4354 return 0; 4355 } 4356 4357 /* 4358 * defrag a xattr bucket if we find that the bucket has some 4359 * holes beteen name/value pairs. 4360 * We will move all the name/value pairs to the end of the bucket 4361 * so that we can spare some space for insertion. 4362 */ 4363 static int ocfs2_defrag_xattr_bucket(struct inode *inode, 4364 handle_t *handle, 4365 struct ocfs2_xattr_bucket *bucket) 4366 { 4367 int ret, i; 4368 size_t end, offset, len; 4369 struct ocfs2_xattr_header *xh; 4370 char *entries, *buf, *bucket_buf = NULL; 4371 u64 blkno = bucket_blkno(bucket); 4372 u16 xh_free_start; 4373 size_t blocksize = inode->i_sb->s_blocksize; 4374 struct ocfs2_xattr_entry *xe; 4375 4376 /* 4377 * In order to make the operation more efficient and generic, 4378 * we copy all the blocks into a contiguous memory and do the 4379 * defragment there, so if anything is error, we will not touch 4380 * the real block. 4381 */ 4382 bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS); 4383 if (!bucket_buf) { 4384 ret = -EIO; 4385 goto out; 4386 } 4387 4388 buf = bucket_buf; 4389 for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize) 4390 memcpy(buf, bucket_block(bucket, i), blocksize); 4391 4392 ret = ocfs2_xattr_bucket_journal_access(handle, bucket, 4393 OCFS2_JOURNAL_ACCESS_WRITE); 4394 if (ret < 0) { 4395 mlog_errno(ret); 4396 goto out; 4397 } 4398 4399 xh = (struct ocfs2_xattr_header *)bucket_buf; 4400 entries = (char *)xh->xh_entries; 4401 xh_free_start = le16_to_cpu(xh->xh_free_start); 4402 4403 mlog(0, "adjust xattr bucket in %llu, count = %u, " 4404 "xh_free_start = %u, xh_name_value_len = %u.\n", 4405 (unsigned long long)blkno, le16_to_cpu(xh->xh_count), 4406 xh_free_start, le16_to_cpu(xh->xh_name_value_len)); 4407 4408 /* 4409 * sort all the entries by their offset. 4410 * the largest will be the first, so that we can 4411 * move them to the end one by one. 4412 */ 4413 sort(entries, le16_to_cpu(xh->xh_count), 4414 sizeof(struct ocfs2_xattr_entry), 4415 cmp_xe_offset, swap_xe); 4416 4417 /* Move all name/values to the end of the bucket. */ 4418 xe = xh->xh_entries; 4419 end = OCFS2_XATTR_BUCKET_SIZE; 4420 for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) { 4421 offset = le16_to_cpu(xe->xe_name_offset); 4422 len = namevalue_size_xe(xe); 4423 4424 /* 4425 * We must make sure that the name/value pair 4426 * exist in the same block. So adjust end to 4427 * the previous block end if needed. 4428 */ 4429 if (((end - len) / blocksize != 4430 (end - 1) / blocksize)) 4431 end = end - end % blocksize; 4432 4433 if (end > offset + len) { 4434 memmove(bucket_buf + end - len, 4435 bucket_buf + offset, len); 4436 xe->xe_name_offset = cpu_to_le16(end - len); 4437 } 4438 4439 mlog_bug_on_msg(end < offset + len, "Defrag check failed for " 4440 "bucket %llu\n", (unsigned long long)blkno); 4441 4442 end -= len; 4443 } 4444 4445 mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for " 4446 "bucket %llu\n", (unsigned long long)blkno); 4447 4448 if (xh_free_start == end) 4449 goto out; 4450 4451 memset(bucket_buf + xh_free_start, 0, end - xh_free_start); 4452 xh->xh_free_start = cpu_to_le16(end); 4453 4454 /* sort the entries by their name_hash. */ 4455 sort(entries, le16_to_cpu(xh->xh_count), 4456 sizeof(struct ocfs2_xattr_entry), 4457 cmp_xe, swap_xe); 4458 4459 buf = bucket_buf; 4460 for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize) 4461 memcpy(bucket_block(bucket, i), buf, blocksize); 4462 ocfs2_xattr_bucket_journal_dirty(handle, bucket); 4463 4464 out: 4465 kfree(bucket_buf); 4466 return ret; 4467 } 4468 4469 /* 4470 * prev_blkno points to the start of an existing extent. new_blkno 4471 * points to a newly allocated extent. Because we know each of our 4472 * clusters contains more than bucket, we can easily split one cluster 4473 * at a bucket boundary. So we take the last cluster of the existing 4474 * extent and split it down the middle. We move the last half of the 4475 * buckets in the last cluster of the existing extent over to the new 4476 * extent. 4477 * 4478 * first_bh is the buffer at prev_blkno so we can update the existing 4479 * extent's bucket count. header_bh is the bucket were we were hoping 4480 * to insert our xattr. If the bucket move places the target in the new 4481 * extent, we'll update first_bh and header_bh after modifying the old 4482 * extent. 4483 * 4484 * first_hash will be set as the 1st xe's name_hash in the new extent. 4485 */ 4486 static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode, 4487 handle_t *handle, 4488 struct ocfs2_xattr_bucket *first, 4489 struct ocfs2_xattr_bucket *target, 4490 u64 new_blkno, 4491 u32 num_clusters, 4492 u32 *first_hash) 4493 { 4494 int ret; 4495 struct super_block *sb = inode->i_sb; 4496 int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(sb); 4497 int num_buckets = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb)); 4498 int to_move = num_buckets / 2; 4499 u64 src_blkno; 4500 u64 last_cluster_blkno = bucket_blkno(first) + 4501 ((num_clusters - 1) * ocfs2_clusters_to_blocks(sb, 1)); 4502 4503 BUG_ON(le16_to_cpu(bucket_xh(first)->xh_num_buckets) < num_buckets); 4504 BUG_ON(OCFS2_XATTR_BUCKET_SIZE == OCFS2_SB(sb)->s_clustersize); 4505 4506 mlog(0, "move half of xattrs in cluster %llu to %llu\n", 4507 (unsigned long long)last_cluster_blkno, (unsigned long long)new_blkno); 4508 4509 ret = ocfs2_mv_xattr_buckets(inode, handle, bucket_blkno(first), 4510 last_cluster_blkno, new_blkno, 4511 to_move, first_hash); 4512 if (ret) { 4513 mlog_errno(ret); 4514 goto out; 4515 } 4516 4517 /* This is the first bucket that got moved */ 4518 src_blkno = last_cluster_blkno + (to_move * blks_per_bucket); 4519 4520 /* 4521 * If the target bucket was part of the moved buckets, we need to 4522 * update first and target. 4523 */ 4524 if (bucket_blkno(target) >= src_blkno) { 4525 /* Find the block for the new target bucket */ 4526 src_blkno = new_blkno + 4527 (bucket_blkno(target) - src_blkno); 4528 4529 ocfs2_xattr_bucket_relse(first); 4530 ocfs2_xattr_bucket_relse(target); 4531 4532 /* 4533 * These shouldn't fail - the buffers are in the 4534 * journal from ocfs2_cp_xattr_bucket(). 4535 */ 4536 ret = ocfs2_read_xattr_bucket(first, new_blkno); 4537 if (ret) { 4538 mlog_errno(ret); 4539 goto out; 4540 } 4541 ret = ocfs2_read_xattr_bucket(target, src_blkno); 4542 if (ret) 4543 mlog_errno(ret); 4544 4545 } 4546 4547 out: 4548 return ret; 4549 } 4550 4551 /* 4552 * Find the suitable pos when we divide a bucket into 2. 4553 * We have to make sure the xattrs with the same hash value exist 4554 * in the same bucket. 4555 * 4556 * If this ocfs2_xattr_header covers more than one hash value, find a 4557 * place where the hash value changes. Try to find the most even split. 4558 * The most common case is that all entries have different hash values, 4559 * and the first check we make will find a place to split. 4560 */ 4561 static int ocfs2_xattr_find_divide_pos(struct ocfs2_xattr_header *xh) 4562 { 4563 struct ocfs2_xattr_entry *entries = xh->xh_entries; 4564 int count = le16_to_cpu(xh->xh_count); 4565 int delta, middle = count / 2; 4566 4567 /* 4568 * We start at the middle. Each step gets farther away in both 4569 * directions. We therefore hit the change in hash value 4570 * nearest to the middle. Note that this loop does not execute for 4571 * count < 2. 4572 */ 4573 for (delta = 0; delta < middle; delta++) { 4574 /* Let's check delta earlier than middle */ 4575 if (cmp_xe(&entries[middle - delta - 1], 4576 &entries[middle - delta])) 4577 return middle - delta; 4578 4579 /* For even counts, don't walk off the end */ 4580 if ((middle + delta + 1) == count) 4581 continue; 4582 4583 /* Now try delta past middle */ 4584 if (cmp_xe(&entries[middle + delta], 4585 &entries[middle + delta + 1])) 4586 return middle + delta + 1; 4587 } 4588 4589 /* Every entry had the same hash */ 4590 return count; 4591 } 4592 4593 /* 4594 * Move some xattrs in old bucket(blk) to new bucket(new_blk). 4595 * first_hash will record the 1st hash of the new bucket. 4596 * 4597 * Normally half of the xattrs will be moved. But we have to make 4598 * sure that the xattrs with the same hash value are stored in the 4599 * same bucket. If all the xattrs in this bucket have the same hash 4600 * value, the new bucket will be initialized as an empty one and the 4601 * first_hash will be initialized as (hash_value+1). 4602 */ 4603 static int ocfs2_divide_xattr_bucket(struct inode *inode, 4604 handle_t *handle, 4605 u64 blk, 4606 u64 new_blk, 4607 u32 *first_hash, 4608 int new_bucket_head) 4609 { 4610 int ret, i; 4611 int count, start, len, name_value_len = 0, name_offset = 0; 4612 struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL; 4613 struct ocfs2_xattr_header *xh; 4614 struct ocfs2_xattr_entry *xe; 4615 int blocksize = inode->i_sb->s_blocksize; 4616 4617 mlog(0, "move some of xattrs from bucket %llu to %llu\n", 4618 (unsigned long long)blk, (unsigned long long)new_blk); 4619 4620 s_bucket = ocfs2_xattr_bucket_new(inode); 4621 t_bucket = ocfs2_xattr_bucket_new(inode); 4622 if (!s_bucket || !t_bucket) { 4623 ret = -ENOMEM; 4624 mlog_errno(ret); 4625 goto out; 4626 } 4627 4628 ret = ocfs2_read_xattr_bucket(s_bucket, blk); 4629 if (ret) { 4630 mlog_errno(ret); 4631 goto out; 4632 } 4633 4634 ret = ocfs2_xattr_bucket_journal_access(handle, s_bucket, 4635 OCFS2_JOURNAL_ACCESS_WRITE); 4636 if (ret) { 4637 mlog_errno(ret); 4638 goto out; 4639 } 4640 4641 /* 4642 * Even if !new_bucket_head, we're overwriting t_bucket. Thus, 4643 * there's no need to read it. 4644 */ 4645 ret = ocfs2_init_xattr_bucket(t_bucket, new_blk); 4646 if (ret) { 4647 mlog_errno(ret); 4648 goto out; 4649 } 4650 4651 /* 4652 * Hey, if we're overwriting t_bucket, what difference does 4653 * ACCESS_CREATE vs ACCESS_WRITE make? See the comment in the 4654 * same part of ocfs2_cp_xattr_bucket(). 4655 */ 4656 ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket, 4657 new_bucket_head ? 4658 OCFS2_JOURNAL_ACCESS_CREATE : 4659 OCFS2_JOURNAL_ACCESS_WRITE); 4660 if (ret) { 4661 mlog_errno(ret); 4662 goto out; 4663 } 4664 4665 xh = bucket_xh(s_bucket); 4666 count = le16_to_cpu(xh->xh_count); 4667 start = ocfs2_xattr_find_divide_pos(xh); 4668 4669 if (start == count) { 4670 xe = &xh->xh_entries[start-1]; 4671 4672 /* 4673 * initialized a new empty bucket here. 4674 * The hash value is set as one larger than 4675 * that of the last entry in the previous bucket. 4676 */ 4677 for (i = 0; i < t_bucket->bu_blocks; i++) 4678 memset(bucket_block(t_bucket, i), 0, blocksize); 4679 4680 xh = bucket_xh(t_bucket); 4681 xh->xh_free_start = cpu_to_le16(blocksize); 4682 xh->xh_entries[0].xe_name_hash = xe->xe_name_hash; 4683 le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1); 4684 4685 goto set_num_buckets; 4686 } 4687 4688 /* copy the whole bucket to the new first. */ 4689 ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket); 4690 4691 /* update the new bucket. */ 4692 xh = bucket_xh(t_bucket); 4693 4694 /* 4695 * Calculate the total name/value len and xh_free_start for 4696 * the old bucket first. 4697 */ 4698 name_offset = OCFS2_XATTR_BUCKET_SIZE; 4699 name_value_len = 0; 4700 for (i = 0; i < start; i++) { 4701 xe = &xh->xh_entries[i]; 4702 name_value_len += namevalue_size_xe(xe); 4703 if (le16_to_cpu(xe->xe_name_offset) < name_offset) 4704 name_offset = le16_to_cpu(xe->xe_name_offset); 4705 } 4706 4707 /* 4708 * Now begin the modification to the new bucket. 4709 * 4710 * In the new bucket, We just move the xattr entry to the beginning 4711 * and don't touch the name/value. So there will be some holes in the 4712 * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is 4713 * called. 4714 */ 4715 xe = &xh->xh_entries[start]; 4716 len = sizeof(struct ocfs2_xattr_entry) * (count - start); 4717 mlog(0, "mv xattr entry len %d from %d to %d\n", len, 4718 (int)((char *)xe - (char *)xh), 4719 (int)((char *)xh->xh_entries - (char *)xh)); 4720 memmove((char *)xh->xh_entries, (char *)xe, len); 4721 xe = &xh->xh_entries[count - start]; 4722 len = sizeof(struct ocfs2_xattr_entry) * start; 4723 memset((char *)xe, 0, len); 4724 4725 le16_add_cpu(&xh->xh_count, -start); 4726 le16_add_cpu(&xh->xh_name_value_len, -name_value_len); 4727 4728 /* Calculate xh_free_start for the new bucket. */ 4729 xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE); 4730 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 4731 xe = &xh->xh_entries[i]; 4732 if (le16_to_cpu(xe->xe_name_offset) < 4733 le16_to_cpu(xh->xh_free_start)) 4734 xh->xh_free_start = xe->xe_name_offset; 4735 } 4736 4737 set_num_buckets: 4738 /* set xh->xh_num_buckets for the new xh. */ 4739 if (new_bucket_head) 4740 xh->xh_num_buckets = cpu_to_le16(1); 4741 else 4742 xh->xh_num_buckets = 0; 4743 4744 ocfs2_xattr_bucket_journal_dirty(handle, t_bucket); 4745 4746 /* store the first_hash of the new bucket. */ 4747 if (first_hash) 4748 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash); 4749 4750 /* 4751 * Now only update the 1st block of the old bucket. If we 4752 * just added a new empty bucket, there is no need to modify 4753 * it. 4754 */ 4755 if (start == count) 4756 goto out; 4757 4758 xh = bucket_xh(s_bucket); 4759 memset(&xh->xh_entries[start], 0, 4760 sizeof(struct ocfs2_xattr_entry) * (count - start)); 4761 xh->xh_count = cpu_to_le16(start); 4762 xh->xh_free_start = cpu_to_le16(name_offset); 4763 xh->xh_name_value_len = cpu_to_le16(name_value_len); 4764 4765 ocfs2_xattr_bucket_journal_dirty(handle, s_bucket); 4766 4767 out: 4768 ocfs2_xattr_bucket_free(s_bucket); 4769 ocfs2_xattr_bucket_free(t_bucket); 4770 4771 return ret; 4772 } 4773 4774 /* 4775 * Copy xattr from one bucket to another bucket. 4776 * 4777 * The caller must make sure that the journal transaction 4778 * has enough space for journaling. 4779 */ 4780 static int ocfs2_cp_xattr_bucket(struct inode *inode, 4781 handle_t *handle, 4782 u64 s_blkno, 4783 u64 t_blkno, 4784 int t_is_new) 4785 { 4786 int ret; 4787 struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL; 4788 4789 BUG_ON(s_blkno == t_blkno); 4790 4791 mlog(0, "cp bucket %llu to %llu, target is %d\n", 4792 (unsigned long long)s_blkno, (unsigned long long)t_blkno, 4793 t_is_new); 4794 4795 s_bucket = ocfs2_xattr_bucket_new(inode); 4796 t_bucket = ocfs2_xattr_bucket_new(inode); 4797 if (!s_bucket || !t_bucket) { 4798 ret = -ENOMEM; 4799 mlog_errno(ret); 4800 goto out; 4801 } 4802 4803 ret = ocfs2_read_xattr_bucket(s_bucket, s_blkno); 4804 if (ret) 4805 goto out; 4806 4807 /* 4808 * Even if !t_is_new, we're overwriting t_bucket. Thus, 4809 * there's no need to read it. 4810 */ 4811 ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno); 4812 if (ret) 4813 goto out; 4814 4815 /* 4816 * Hey, if we're overwriting t_bucket, what difference does 4817 * ACCESS_CREATE vs ACCESS_WRITE make? Well, if we allocated a new 4818 * cluster to fill, we came here from 4819 * ocfs2_mv_xattr_buckets(), and it is really new - 4820 * ACCESS_CREATE is required. But we also might have moved data 4821 * out of t_bucket before extending back into it. 4822 * ocfs2_add_new_xattr_bucket() can do this - its call to 4823 * ocfs2_add_new_xattr_cluster() may have created a new extent 4824 * and copied out the end of the old extent. Then it re-extends 4825 * the old extent back to create space for new xattrs. That's 4826 * how we get here, and the bucket isn't really new. 4827 */ 4828 ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket, 4829 t_is_new ? 4830 OCFS2_JOURNAL_ACCESS_CREATE : 4831 OCFS2_JOURNAL_ACCESS_WRITE); 4832 if (ret) 4833 goto out; 4834 4835 ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket); 4836 ocfs2_xattr_bucket_journal_dirty(handle, t_bucket); 4837 4838 out: 4839 ocfs2_xattr_bucket_free(t_bucket); 4840 ocfs2_xattr_bucket_free(s_bucket); 4841 4842 return ret; 4843 } 4844 4845 /* 4846 * src_blk points to the start of an existing extent. last_blk points to 4847 * last cluster in that extent. to_blk points to a newly allocated 4848 * extent. We copy the buckets from the cluster at last_blk to the new 4849 * extent. If start_bucket is non-zero, we skip that many buckets before 4850 * we start copying. The new extent's xh_num_buckets gets set to the 4851 * number of buckets we copied. The old extent's xh_num_buckets shrinks 4852 * by the same amount. 4853 */ 4854 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle, 4855 u64 src_blk, u64 last_blk, u64 to_blk, 4856 unsigned int start_bucket, 4857 u32 *first_hash) 4858 { 4859 int i, ret, credits; 4860 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 4861 int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 4862 int num_buckets = ocfs2_xattr_buckets_per_cluster(osb); 4863 struct ocfs2_xattr_bucket *old_first, *new_first; 4864 4865 mlog(0, "mv xattrs from cluster %llu to %llu\n", 4866 (unsigned long long)last_blk, (unsigned long long)to_blk); 4867 4868 BUG_ON(start_bucket >= num_buckets); 4869 if (start_bucket) { 4870 num_buckets -= start_bucket; 4871 last_blk += (start_bucket * blks_per_bucket); 4872 } 4873 4874 /* The first bucket of the original extent */ 4875 old_first = ocfs2_xattr_bucket_new(inode); 4876 /* The first bucket of the new extent */ 4877 new_first = ocfs2_xattr_bucket_new(inode); 4878 if (!old_first || !new_first) { 4879 ret = -ENOMEM; 4880 mlog_errno(ret); 4881 goto out; 4882 } 4883 4884 ret = ocfs2_read_xattr_bucket(old_first, src_blk); 4885 if (ret) { 4886 mlog_errno(ret); 4887 goto out; 4888 } 4889 4890 /* 4891 * We need to update the first bucket of the old extent and all 4892 * the buckets going to the new extent. 4893 */ 4894 credits = ((num_buckets + 1) * blks_per_bucket); 4895 ret = ocfs2_extend_trans(handle, credits); 4896 if (ret) { 4897 mlog_errno(ret); 4898 goto out; 4899 } 4900 4901 ret = ocfs2_xattr_bucket_journal_access(handle, old_first, 4902 OCFS2_JOURNAL_ACCESS_WRITE); 4903 if (ret) { 4904 mlog_errno(ret); 4905 goto out; 4906 } 4907 4908 for (i = 0; i < num_buckets; i++) { 4909 ret = ocfs2_cp_xattr_bucket(inode, handle, 4910 last_blk + (i * blks_per_bucket), 4911 to_blk + (i * blks_per_bucket), 4912 1); 4913 if (ret) { 4914 mlog_errno(ret); 4915 goto out; 4916 } 4917 } 4918 4919 /* 4920 * Get the new bucket ready before we dirty anything 4921 * (This actually shouldn't fail, because we already dirtied 4922 * it once in ocfs2_cp_xattr_bucket()). 4923 */ 4924 ret = ocfs2_read_xattr_bucket(new_first, to_blk); 4925 if (ret) { 4926 mlog_errno(ret); 4927 goto out; 4928 } 4929 ret = ocfs2_xattr_bucket_journal_access(handle, new_first, 4930 OCFS2_JOURNAL_ACCESS_WRITE); 4931 if (ret) { 4932 mlog_errno(ret); 4933 goto out; 4934 } 4935 4936 /* Now update the headers */ 4937 le16_add_cpu(&bucket_xh(old_first)->xh_num_buckets, -num_buckets); 4938 ocfs2_xattr_bucket_journal_dirty(handle, old_first); 4939 4940 bucket_xh(new_first)->xh_num_buckets = cpu_to_le16(num_buckets); 4941 ocfs2_xattr_bucket_journal_dirty(handle, new_first); 4942 4943 if (first_hash) 4944 *first_hash = le32_to_cpu(bucket_xh(new_first)->xh_entries[0].xe_name_hash); 4945 4946 out: 4947 ocfs2_xattr_bucket_free(new_first); 4948 ocfs2_xattr_bucket_free(old_first); 4949 return ret; 4950 } 4951 4952 /* 4953 * Move some xattrs in this cluster to the new cluster. 4954 * This function should only be called when bucket size == cluster size. 4955 * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead. 4956 */ 4957 static int ocfs2_divide_xattr_cluster(struct inode *inode, 4958 handle_t *handle, 4959 u64 prev_blk, 4960 u64 new_blk, 4961 u32 *first_hash) 4962 { 4963 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 4964 int ret, credits = 2 * blk_per_bucket; 4965 4966 BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize); 4967 4968 ret = ocfs2_extend_trans(handle, credits); 4969 if (ret) { 4970 mlog_errno(ret); 4971 return ret; 4972 } 4973 4974 /* Move half of the xattr in start_blk to the next bucket. */ 4975 return ocfs2_divide_xattr_bucket(inode, handle, prev_blk, 4976 new_blk, first_hash, 1); 4977 } 4978 4979 /* 4980 * Move some xattrs from the old cluster to the new one since they are not 4981 * contiguous in ocfs2 xattr tree. 4982 * 4983 * new_blk starts a new separate cluster, and we will move some xattrs from 4984 * prev_blk to it. v_start will be set as the first name hash value in this 4985 * new cluster so that it can be used as e_cpos during tree insertion and 4986 * don't collide with our original b-tree operations. first_bh and header_bh 4987 * will also be updated since they will be used in ocfs2_extend_xattr_bucket 4988 * to extend the insert bucket. 4989 * 4990 * The problem is how much xattr should we move to the new one and when should 4991 * we update first_bh and header_bh? 4992 * 1. If cluster size > bucket size, that means the previous cluster has more 4993 * than 1 bucket, so just move half nums of bucket into the new cluster and 4994 * update the first_bh and header_bh if the insert bucket has been moved 4995 * to the new cluster. 4996 * 2. If cluster_size == bucket_size: 4997 * a) If the previous extent rec has more than one cluster and the insert 4998 * place isn't in the last cluster, copy the entire last cluster to the 4999 * new one. This time, we don't need to upate the first_bh and header_bh 5000 * since they will not be moved into the new cluster. 5001 * b) Otherwise, move the bottom half of the xattrs in the last cluster into 5002 * the new one. And we set the extend flag to zero if the insert place is 5003 * moved into the new allocated cluster since no extend is needed. 5004 */ 5005 static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode, 5006 handle_t *handle, 5007 struct ocfs2_xattr_bucket *first, 5008 struct ocfs2_xattr_bucket *target, 5009 u64 new_blk, 5010 u32 prev_clusters, 5011 u32 *v_start, 5012 int *extend) 5013 { 5014 int ret; 5015 5016 mlog(0, "adjust xattrs from cluster %llu len %u to %llu\n", 5017 (unsigned long long)bucket_blkno(first), prev_clusters, 5018 (unsigned long long)new_blk); 5019 5020 if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) { 5021 ret = ocfs2_mv_xattr_bucket_cross_cluster(inode, 5022 handle, 5023 first, target, 5024 new_blk, 5025 prev_clusters, 5026 v_start); 5027 if (ret) 5028 mlog_errno(ret); 5029 } else { 5030 /* The start of the last cluster in the first extent */ 5031 u64 last_blk = bucket_blkno(first) + 5032 ((prev_clusters - 1) * 5033 ocfs2_clusters_to_blocks(inode->i_sb, 1)); 5034 5035 if (prev_clusters > 1 && bucket_blkno(target) != last_blk) { 5036 ret = ocfs2_mv_xattr_buckets(inode, handle, 5037 bucket_blkno(first), 5038 last_blk, new_blk, 0, 5039 v_start); 5040 if (ret) 5041 mlog_errno(ret); 5042 } else { 5043 ret = ocfs2_divide_xattr_cluster(inode, handle, 5044 last_blk, new_blk, 5045 v_start); 5046 if (ret) 5047 mlog_errno(ret); 5048 5049 if ((bucket_blkno(target) == last_blk) && extend) 5050 *extend = 0; 5051 } 5052 } 5053 5054 return ret; 5055 } 5056 5057 /* 5058 * Add a new cluster for xattr storage. 5059 * 5060 * If the new cluster is contiguous with the previous one, it will be 5061 * appended to the same extent record, and num_clusters will be updated. 5062 * If not, we will insert a new extent for it and move some xattrs in 5063 * the last cluster into the new allocated one. 5064 * We also need to limit the maximum size of a btree leaf, otherwise we'll 5065 * lose the benefits of hashing because we'll have to search large leaves. 5066 * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize, 5067 * if it's bigger). 5068 * 5069 * first_bh is the first block of the previous extent rec and header_bh 5070 * indicates the bucket we will insert the new xattrs. They will be updated 5071 * when the header_bh is moved into the new cluster. 5072 */ 5073 static int ocfs2_add_new_xattr_cluster(struct inode *inode, 5074 struct buffer_head *root_bh, 5075 struct ocfs2_xattr_bucket *first, 5076 struct ocfs2_xattr_bucket *target, 5077 u32 *num_clusters, 5078 u32 prev_cpos, 5079 int *extend, 5080 struct ocfs2_xattr_set_ctxt *ctxt) 5081 { 5082 int ret; 5083 u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 5084 u32 prev_clusters = *num_clusters; 5085 u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0; 5086 u64 block; 5087 handle_t *handle = ctxt->handle; 5088 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5089 struct ocfs2_extent_tree et; 5090 5091 mlog(0, "Add new xattr cluster for %llu, previous xattr hash = %u, " 5092 "previous xattr blkno = %llu\n", 5093 (unsigned long long)OCFS2_I(inode)->ip_blkno, 5094 prev_cpos, (unsigned long long)bucket_blkno(first)); 5095 5096 ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh); 5097 5098 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh, 5099 OCFS2_JOURNAL_ACCESS_WRITE); 5100 if (ret < 0) { 5101 mlog_errno(ret); 5102 goto leave; 5103 } 5104 5105 ret = __ocfs2_claim_clusters(handle, ctxt->data_ac, 1, 5106 clusters_to_add, &bit_off, &num_bits); 5107 if (ret < 0) { 5108 if (ret != -ENOSPC) 5109 mlog_errno(ret); 5110 goto leave; 5111 } 5112 5113 BUG_ON(num_bits > clusters_to_add); 5114 5115 block = ocfs2_clusters_to_blocks(osb->sb, bit_off); 5116 mlog(0, "Allocating %u clusters at block %u for xattr in inode %llu\n", 5117 num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno); 5118 5119 if (bucket_blkno(first) + (prev_clusters * bpc) == block && 5120 (prev_clusters + num_bits) << osb->s_clustersize_bits <= 5121 OCFS2_MAX_XATTR_TREE_LEAF_SIZE) { 5122 /* 5123 * If this cluster is contiguous with the old one and 5124 * adding this new cluster, we don't surpass the limit of 5125 * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be 5126 * initialized and used like other buckets in the previous 5127 * cluster. 5128 * So add it as a contiguous one. The caller will handle 5129 * its init process. 5130 */ 5131 v_start = prev_cpos + prev_clusters; 5132 *num_clusters = prev_clusters + num_bits; 5133 mlog(0, "Add contiguous %u clusters to previous extent rec.\n", 5134 num_bits); 5135 } else { 5136 ret = ocfs2_adjust_xattr_cross_cluster(inode, 5137 handle, 5138 first, 5139 target, 5140 block, 5141 prev_clusters, 5142 &v_start, 5143 extend); 5144 if (ret) { 5145 mlog_errno(ret); 5146 goto leave; 5147 } 5148 } 5149 5150 mlog(0, "Insert %u clusters at block %llu for xattr at %u\n", 5151 num_bits, (unsigned long long)block, v_start); 5152 ret = ocfs2_insert_extent(handle, &et, v_start, block, 5153 num_bits, 0, ctxt->meta_ac); 5154 if (ret < 0) { 5155 mlog_errno(ret); 5156 goto leave; 5157 } 5158 5159 ocfs2_journal_dirty(handle, root_bh); 5160 5161 leave: 5162 return ret; 5163 } 5164 5165 /* 5166 * We are given an extent. 'first' is the bucket at the very front of 5167 * the extent. The extent has space for an additional bucket past 5168 * bucket_xh(first)->xh_num_buckets. 'target_blkno' is the block number 5169 * of the target bucket. We wish to shift every bucket past the target 5170 * down one, filling in that additional space. When we get back to the 5171 * target, we split the target between itself and the now-empty bucket 5172 * at target+1 (aka, target_blkno + blks_per_bucket). 5173 */ 5174 static int ocfs2_extend_xattr_bucket(struct inode *inode, 5175 handle_t *handle, 5176 struct ocfs2_xattr_bucket *first, 5177 u64 target_blk, 5178 u32 num_clusters) 5179 { 5180 int ret, credits; 5181 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5182 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 5183 u64 end_blk; 5184 u16 new_bucket = le16_to_cpu(bucket_xh(first)->xh_num_buckets); 5185 5186 mlog(0, "extend xattr bucket in %llu, xattr extend rec starting " 5187 "from %llu, len = %u\n", (unsigned long long)target_blk, 5188 (unsigned long long)bucket_blkno(first), num_clusters); 5189 5190 /* The extent must have room for an additional bucket */ 5191 BUG_ON(new_bucket >= 5192 (num_clusters * ocfs2_xattr_buckets_per_cluster(osb))); 5193 5194 /* end_blk points to the last existing bucket */ 5195 end_blk = bucket_blkno(first) + ((new_bucket - 1) * blk_per_bucket); 5196 5197 /* 5198 * end_blk is the start of the last existing bucket. 5199 * Thus, (end_blk - target_blk) covers the target bucket and 5200 * every bucket after it up to, but not including, the last 5201 * existing bucket. Then we add the last existing bucket, the 5202 * new bucket, and the first bucket (3 * blk_per_bucket). 5203 */ 5204 credits = (end_blk - target_blk) + (3 * blk_per_bucket); 5205 ret = ocfs2_extend_trans(handle, credits); 5206 if (ret) { 5207 mlog_errno(ret); 5208 goto out; 5209 } 5210 5211 ret = ocfs2_xattr_bucket_journal_access(handle, first, 5212 OCFS2_JOURNAL_ACCESS_WRITE); 5213 if (ret) { 5214 mlog_errno(ret); 5215 goto out; 5216 } 5217 5218 while (end_blk != target_blk) { 5219 ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk, 5220 end_blk + blk_per_bucket, 0); 5221 if (ret) 5222 goto out; 5223 end_blk -= blk_per_bucket; 5224 } 5225 5226 /* Move half of the xattr in target_blkno to the next bucket. */ 5227 ret = ocfs2_divide_xattr_bucket(inode, handle, target_blk, 5228 target_blk + blk_per_bucket, NULL, 0); 5229 5230 le16_add_cpu(&bucket_xh(first)->xh_num_buckets, 1); 5231 ocfs2_xattr_bucket_journal_dirty(handle, first); 5232 5233 out: 5234 return ret; 5235 } 5236 5237 /* 5238 * Add new xattr bucket in an extent record and adjust the buckets 5239 * accordingly. xb_bh is the ocfs2_xattr_block, and target is the 5240 * bucket we want to insert into. 5241 * 5242 * In the easy case, we will move all the buckets after target down by 5243 * one. Half of target's xattrs will be moved to the next bucket. 5244 * 5245 * If current cluster is full, we'll allocate a new one. This may not 5246 * be contiguous. The underlying calls will make sure that there is 5247 * space for the insert, shifting buckets around if necessary. 5248 * 'target' may be moved by those calls. 5249 */ 5250 static int ocfs2_add_new_xattr_bucket(struct inode *inode, 5251 struct buffer_head *xb_bh, 5252 struct ocfs2_xattr_bucket *target, 5253 struct ocfs2_xattr_set_ctxt *ctxt) 5254 { 5255 struct ocfs2_xattr_block *xb = 5256 (struct ocfs2_xattr_block *)xb_bh->b_data; 5257 struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root; 5258 struct ocfs2_extent_list *el = &xb_root->xt_list; 5259 u32 name_hash = 5260 le32_to_cpu(bucket_xh(target)->xh_entries[0].xe_name_hash); 5261 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5262 int ret, num_buckets, extend = 1; 5263 u64 p_blkno; 5264 u32 e_cpos, num_clusters; 5265 /* The bucket at the front of the extent */ 5266 struct ocfs2_xattr_bucket *first; 5267 5268 mlog(0, "Add new xattr bucket starting from %llu\n", 5269 (unsigned long long)bucket_blkno(target)); 5270 5271 /* The first bucket of the original extent */ 5272 first = ocfs2_xattr_bucket_new(inode); 5273 if (!first) { 5274 ret = -ENOMEM; 5275 mlog_errno(ret); 5276 goto out; 5277 } 5278 5279 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos, 5280 &num_clusters, el); 5281 if (ret) { 5282 mlog_errno(ret); 5283 goto out; 5284 } 5285 5286 ret = ocfs2_read_xattr_bucket(first, p_blkno); 5287 if (ret) { 5288 mlog_errno(ret); 5289 goto out; 5290 } 5291 5292 num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters; 5293 if (num_buckets == le16_to_cpu(bucket_xh(first)->xh_num_buckets)) { 5294 /* 5295 * This can move first+target if the target bucket moves 5296 * to the new extent. 5297 */ 5298 ret = ocfs2_add_new_xattr_cluster(inode, 5299 xb_bh, 5300 first, 5301 target, 5302 &num_clusters, 5303 e_cpos, 5304 &extend, 5305 ctxt); 5306 if (ret) { 5307 mlog_errno(ret); 5308 goto out; 5309 } 5310 } 5311 5312 if (extend) { 5313 ret = ocfs2_extend_xattr_bucket(inode, 5314 ctxt->handle, 5315 first, 5316 bucket_blkno(target), 5317 num_clusters); 5318 if (ret) 5319 mlog_errno(ret); 5320 } 5321 5322 out: 5323 ocfs2_xattr_bucket_free(first); 5324 5325 return ret; 5326 } 5327 5328 static inline char *ocfs2_xattr_bucket_get_val(struct inode *inode, 5329 struct ocfs2_xattr_bucket *bucket, 5330 int offs) 5331 { 5332 int block_off = offs >> inode->i_sb->s_blocksize_bits; 5333 5334 offs = offs % inode->i_sb->s_blocksize; 5335 return bucket_block(bucket, block_off) + offs; 5336 } 5337 5338 /* 5339 * Truncate the specified xe_off entry in xattr bucket. 5340 * bucket is indicated by header_bh and len is the new length. 5341 * Both the ocfs2_xattr_value_root and the entry will be updated here. 5342 * 5343 * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed. 5344 */ 5345 static int ocfs2_xattr_bucket_value_truncate(struct inode *inode, 5346 struct ocfs2_xattr_bucket *bucket, 5347 int xe_off, 5348 int len, 5349 struct ocfs2_xattr_set_ctxt *ctxt) 5350 { 5351 int ret, offset; 5352 u64 value_blk; 5353 struct ocfs2_xattr_entry *xe; 5354 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 5355 size_t blocksize = inode->i_sb->s_blocksize; 5356 struct ocfs2_xattr_value_buf vb = { 5357 .vb_access = ocfs2_journal_access, 5358 }; 5359 5360 xe = &xh->xh_entries[xe_off]; 5361 5362 BUG_ON(!xe || ocfs2_xattr_is_local(xe)); 5363 5364 offset = le16_to_cpu(xe->xe_name_offset) + 5365 OCFS2_XATTR_SIZE(xe->xe_name_len); 5366 5367 value_blk = offset / blocksize; 5368 5369 /* We don't allow ocfs2_xattr_value to be stored in different block. */ 5370 BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize); 5371 5372 vb.vb_bh = bucket->bu_bhs[value_blk]; 5373 BUG_ON(!vb.vb_bh); 5374 5375 vb.vb_xv = (struct ocfs2_xattr_value_root *) 5376 (vb.vb_bh->b_data + offset % blocksize); 5377 5378 /* 5379 * From here on out we have to dirty the bucket. The generic 5380 * value calls only modify one of the bucket's bhs, but we need 5381 * to send the bucket at once. So if they error, they *could* have 5382 * modified something. We have to assume they did, and dirty 5383 * the whole bucket. This leaves us in a consistent state. 5384 */ 5385 mlog(0, "truncate %u in xattr bucket %llu to %d bytes.\n", 5386 xe_off, (unsigned long long)bucket_blkno(bucket), len); 5387 ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt); 5388 if (ret) { 5389 mlog_errno(ret); 5390 goto out; 5391 } 5392 5393 ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket, 5394 OCFS2_JOURNAL_ACCESS_WRITE); 5395 if (ret) { 5396 mlog_errno(ret); 5397 goto out; 5398 } 5399 5400 xe->xe_value_size = cpu_to_le64(len); 5401 5402 ocfs2_xattr_bucket_journal_dirty(ctxt->handle, bucket); 5403 5404 out: 5405 return ret; 5406 } 5407 5408 static int ocfs2_rm_xattr_cluster(struct inode *inode, 5409 struct buffer_head *root_bh, 5410 u64 blkno, 5411 u32 cpos, 5412 u32 len, 5413 void *para) 5414 { 5415 int ret; 5416 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5417 struct inode *tl_inode = osb->osb_tl_inode; 5418 handle_t *handle; 5419 struct ocfs2_xattr_block *xb = 5420 (struct ocfs2_xattr_block *)root_bh->b_data; 5421 struct ocfs2_alloc_context *meta_ac = NULL; 5422 struct ocfs2_cached_dealloc_ctxt dealloc; 5423 struct ocfs2_extent_tree et; 5424 5425 ret = ocfs2_iterate_xattr_buckets(inode, blkno, len, 5426 ocfs2_delete_xattr_in_bucket, para); 5427 if (ret) { 5428 mlog_errno(ret); 5429 return ret; 5430 } 5431 5432 ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh); 5433 5434 ocfs2_init_dealloc_ctxt(&dealloc); 5435 5436 mlog(0, "rm xattr extent rec at %u len = %u, start from %llu\n", 5437 cpos, len, (unsigned long long)blkno); 5438 5439 ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), blkno, 5440 len); 5441 5442 ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac); 5443 if (ret) { 5444 mlog_errno(ret); 5445 return ret; 5446 } 5447 5448 mutex_lock(&tl_inode->i_mutex); 5449 5450 if (ocfs2_truncate_log_needs_flush(osb)) { 5451 ret = __ocfs2_flush_truncate_log(osb); 5452 if (ret < 0) { 5453 mlog_errno(ret); 5454 goto out; 5455 } 5456 } 5457 5458 handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb)); 5459 if (IS_ERR(handle)) { 5460 ret = -ENOMEM; 5461 mlog_errno(ret); 5462 goto out; 5463 } 5464 5465 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh, 5466 OCFS2_JOURNAL_ACCESS_WRITE); 5467 if (ret) { 5468 mlog_errno(ret); 5469 goto out_commit; 5470 } 5471 5472 ret = ocfs2_remove_extent(handle, &et, cpos, len, meta_ac, 5473 &dealloc); 5474 if (ret) { 5475 mlog_errno(ret); 5476 goto out_commit; 5477 } 5478 5479 le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len); 5480 ocfs2_journal_dirty(handle, root_bh); 5481 5482 ret = ocfs2_truncate_log_append(osb, handle, blkno, len); 5483 if (ret) 5484 mlog_errno(ret); 5485 5486 out_commit: 5487 ocfs2_commit_trans(osb, handle); 5488 out: 5489 ocfs2_schedule_truncate_log_flush(osb, 1); 5490 5491 mutex_unlock(&tl_inode->i_mutex); 5492 5493 if (meta_ac) 5494 ocfs2_free_alloc_context(meta_ac); 5495 5496 ocfs2_run_deallocs(osb, &dealloc); 5497 5498 return ret; 5499 } 5500 5501 /* 5502 * check whether the xattr bucket is filled up with the same hash value. 5503 * If we want to insert the xattr with the same hash, return -ENOSPC. 5504 * If we want to insert a xattr with different hash value, go ahead 5505 * and ocfs2_divide_xattr_bucket will handle this. 5506 */ 5507 static int ocfs2_check_xattr_bucket_collision(struct inode *inode, 5508 struct ocfs2_xattr_bucket *bucket, 5509 const char *name) 5510 { 5511 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 5512 u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name)); 5513 5514 if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash)) 5515 return 0; 5516 5517 if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash == 5518 xh->xh_entries[0].xe_name_hash) { 5519 mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, " 5520 "hash = %u\n", 5521 (unsigned long long)bucket_blkno(bucket), 5522 le32_to_cpu(xh->xh_entries[0].xe_name_hash)); 5523 return -ENOSPC; 5524 } 5525 5526 return 0; 5527 } 5528 5529 /* 5530 * Try to set the entry in the current bucket. If we fail, the caller 5531 * will handle getting us another bucket. 5532 */ 5533 static int ocfs2_xattr_set_entry_bucket(struct inode *inode, 5534 struct ocfs2_xattr_info *xi, 5535 struct ocfs2_xattr_search *xs, 5536 struct ocfs2_xattr_set_ctxt *ctxt) 5537 { 5538 int ret; 5539 struct ocfs2_xa_loc loc; 5540 5541 mlog_entry("Set xattr %s in xattr bucket\n", xi->xi_name); 5542 5543 ocfs2_init_xattr_bucket_xa_loc(&loc, xs->bucket, 5544 xs->not_found ? NULL : xs->here); 5545 ret = ocfs2_xa_set(&loc, xi, ctxt); 5546 if (!ret) { 5547 xs->here = loc.xl_entry; 5548 goto out; 5549 } 5550 if (ret != -ENOSPC) { 5551 mlog_errno(ret); 5552 goto out; 5553 } 5554 5555 /* Ok, we need space. Let's try defragmenting the bucket. */ 5556 ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle, 5557 xs->bucket); 5558 if (ret) { 5559 mlog_errno(ret); 5560 goto out; 5561 } 5562 5563 ret = ocfs2_xa_set(&loc, xi, ctxt); 5564 if (!ret) { 5565 xs->here = loc.xl_entry; 5566 goto out; 5567 } 5568 if (ret != -ENOSPC) 5569 mlog_errno(ret); 5570 5571 5572 out: 5573 mlog_exit(ret); 5574 return ret; 5575 } 5576 5577 static int ocfs2_xattr_set_entry_index_block(struct inode *inode, 5578 struct ocfs2_xattr_info *xi, 5579 struct ocfs2_xattr_search *xs, 5580 struct ocfs2_xattr_set_ctxt *ctxt) 5581 { 5582 int ret; 5583 5584 mlog_entry("Set xattr %s in xattr index block\n", xi->xi_name); 5585 5586 ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt); 5587 if (!ret) 5588 goto out; 5589 if (ret != -ENOSPC) { 5590 mlog_errno(ret); 5591 goto out; 5592 } 5593 5594 /* Ack, need more space. Let's try to get another bucket! */ 5595 5596 /* 5597 * We do not allow for overlapping ranges between buckets. And 5598 * the maximum number of collisions we will allow for then is 5599 * one bucket's worth, so check it here whether we need to 5600 * add a new bucket for the insert. 5601 */ 5602 ret = ocfs2_check_xattr_bucket_collision(inode, 5603 xs->bucket, 5604 xi->xi_name); 5605 if (ret) { 5606 mlog_errno(ret); 5607 goto out; 5608 } 5609 5610 ret = ocfs2_add_new_xattr_bucket(inode, 5611 xs->xattr_bh, 5612 xs->bucket, 5613 ctxt); 5614 if (ret) { 5615 mlog_errno(ret); 5616 goto out; 5617 } 5618 5619 /* 5620 * ocfs2_add_new_xattr_bucket() will have updated 5621 * xs->bucket if it moved, but it will not have updated 5622 * any of the other search fields. Thus, we drop it and 5623 * re-search. Everything should be cached, so it'll be 5624 * quick. 5625 */ 5626 ocfs2_xattr_bucket_relse(xs->bucket); 5627 ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh, 5628 xi->xi_name_index, 5629 xi->xi_name, xs); 5630 if (ret && ret != -ENODATA) 5631 goto out; 5632 xs->not_found = ret; 5633 5634 /* Ok, we have a new bucket, let's try again */ 5635 ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt); 5636 if (ret && (ret != -ENOSPC)) 5637 mlog_errno(ret); 5638 5639 out: 5640 mlog_exit(ret); 5641 return ret; 5642 } 5643 5644 static int ocfs2_delete_xattr_in_bucket(struct inode *inode, 5645 struct ocfs2_xattr_bucket *bucket, 5646 void *para) 5647 { 5648 int ret = 0, ref_credits; 5649 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 5650 u16 i; 5651 struct ocfs2_xattr_entry *xe; 5652 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5653 struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,}; 5654 int credits = ocfs2_remove_extent_credits(osb->sb) + 5655 ocfs2_blocks_per_xattr_bucket(inode->i_sb); 5656 struct ocfs2_xattr_value_root *xv; 5657 struct ocfs2_rm_xattr_bucket_para *args = 5658 (struct ocfs2_rm_xattr_bucket_para *)para; 5659 5660 ocfs2_init_dealloc_ctxt(&ctxt.dealloc); 5661 5662 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 5663 xe = &xh->xh_entries[i]; 5664 if (ocfs2_xattr_is_local(xe)) 5665 continue; 5666 5667 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, 5668 i, &xv, NULL); 5669 5670 ret = ocfs2_lock_xattr_remove_allocators(inode, xv, 5671 args->ref_ci, 5672 args->ref_root_bh, 5673 &ctxt.meta_ac, 5674 &ref_credits); 5675 5676 ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits); 5677 if (IS_ERR(ctxt.handle)) { 5678 ret = PTR_ERR(ctxt.handle); 5679 mlog_errno(ret); 5680 break; 5681 } 5682 5683 ret = ocfs2_xattr_bucket_value_truncate(inode, bucket, 5684 i, 0, &ctxt); 5685 5686 ocfs2_commit_trans(osb, ctxt.handle); 5687 if (ctxt.meta_ac) { 5688 ocfs2_free_alloc_context(ctxt.meta_ac); 5689 ctxt.meta_ac = NULL; 5690 } 5691 if (ret) { 5692 mlog_errno(ret); 5693 break; 5694 } 5695 } 5696 5697 if (ctxt.meta_ac) 5698 ocfs2_free_alloc_context(ctxt.meta_ac); 5699 ocfs2_schedule_truncate_log_flush(osb, 1); 5700 ocfs2_run_deallocs(osb, &ctxt.dealloc); 5701 return ret; 5702 } 5703 5704 /* 5705 * Whenever we modify a xattr value root in the bucket(e.g, CoW 5706 * or change the extent record flag), we need to recalculate 5707 * the metaecc for the whole bucket. So it is done here. 5708 * 5709 * Note: 5710 * We have to give the extra credits for the caller. 5711 */ 5712 static int ocfs2_xattr_bucket_post_refcount(struct inode *inode, 5713 handle_t *handle, 5714 void *para) 5715 { 5716 int ret; 5717 struct ocfs2_xattr_bucket *bucket = 5718 (struct ocfs2_xattr_bucket *)para; 5719 5720 ret = ocfs2_xattr_bucket_journal_access(handle, bucket, 5721 OCFS2_JOURNAL_ACCESS_WRITE); 5722 if (ret) { 5723 mlog_errno(ret); 5724 return ret; 5725 } 5726 5727 ocfs2_xattr_bucket_journal_dirty(handle, bucket); 5728 5729 return 0; 5730 } 5731 5732 /* 5733 * Special action we need if the xattr value is refcounted. 5734 * 5735 * 1. If the xattr is refcounted, lock the tree. 5736 * 2. CoW the xattr if we are setting the new value and the value 5737 * will be stored outside. 5738 * 3. In other case, decrease_refcount will work for us, so just 5739 * lock the refcount tree, calculate the meta and credits is OK. 5740 * 5741 * We have to do CoW before ocfs2_init_xattr_set_ctxt since 5742 * currently CoW is a completed transaction, while this function 5743 * will also lock the allocators and let us deadlock. So we will 5744 * CoW the whole xattr value. 5745 */ 5746 static int ocfs2_prepare_refcount_xattr(struct inode *inode, 5747 struct ocfs2_dinode *di, 5748 struct ocfs2_xattr_info *xi, 5749 struct ocfs2_xattr_search *xis, 5750 struct ocfs2_xattr_search *xbs, 5751 struct ocfs2_refcount_tree **ref_tree, 5752 int *meta_add, 5753 int *credits) 5754 { 5755 int ret = 0; 5756 struct ocfs2_xattr_block *xb; 5757 struct ocfs2_xattr_entry *xe; 5758 char *base; 5759 u32 p_cluster, num_clusters; 5760 unsigned int ext_flags; 5761 int name_offset, name_len; 5762 struct ocfs2_xattr_value_buf vb; 5763 struct ocfs2_xattr_bucket *bucket = NULL; 5764 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5765 struct ocfs2_post_refcount refcount; 5766 struct ocfs2_post_refcount *p = NULL; 5767 struct buffer_head *ref_root_bh = NULL; 5768 5769 if (!xis->not_found) { 5770 xe = xis->here; 5771 name_offset = le16_to_cpu(xe->xe_name_offset); 5772 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 5773 base = xis->base; 5774 vb.vb_bh = xis->inode_bh; 5775 vb.vb_access = ocfs2_journal_access_di; 5776 } else { 5777 int i, block_off = 0; 5778 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; 5779 xe = xbs->here; 5780 name_offset = le16_to_cpu(xe->xe_name_offset); 5781 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 5782 i = xbs->here - xbs->header->xh_entries; 5783 5784 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 5785 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 5786 bucket_xh(xbs->bucket), 5787 i, &block_off, 5788 &name_offset); 5789 if (ret) { 5790 mlog_errno(ret); 5791 goto out; 5792 } 5793 base = bucket_block(xbs->bucket, block_off); 5794 vb.vb_bh = xbs->bucket->bu_bhs[block_off]; 5795 vb.vb_access = ocfs2_journal_access; 5796 5797 if (ocfs2_meta_ecc(osb)) { 5798 /*create parameters for ocfs2_post_refcount. */ 5799 bucket = xbs->bucket; 5800 refcount.credits = bucket->bu_blocks; 5801 refcount.para = bucket; 5802 refcount.func = 5803 ocfs2_xattr_bucket_post_refcount; 5804 p = &refcount; 5805 } 5806 } else { 5807 base = xbs->base; 5808 vb.vb_bh = xbs->xattr_bh; 5809 vb.vb_access = ocfs2_journal_access_xb; 5810 } 5811 } 5812 5813 if (ocfs2_xattr_is_local(xe)) 5814 goto out; 5815 5816 vb.vb_xv = (struct ocfs2_xattr_value_root *) 5817 (base + name_offset + name_len); 5818 5819 ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster, 5820 &num_clusters, &vb.vb_xv->xr_list, 5821 &ext_flags); 5822 if (ret) { 5823 mlog_errno(ret); 5824 goto out; 5825 } 5826 5827 /* 5828 * We just need to check the 1st extent record, since we always 5829 * CoW the whole xattr. So there shouldn't be a xattr with 5830 * some REFCOUNT extent recs after the 1st one. 5831 */ 5832 if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) 5833 goto out; 5834 5835 ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc), 5836 1, ref_tree, &ref_root_bh); 5837 if (ret) { 5838 mlog_errno(ret); 5839 goto out; 5840 } 5841 5842 /* 5843 * If we are deleting the xattr or the new size will be stored inside, 5844 * cool, leave it there, the xattr truncate process will remove them 5845 * for us(it still needs the refcount tree lock and the meta, credits). 5846 * And the worse case is that every cluster truncate will split the 5847 * refcount tree, and make the original extent become 3. So we will need 5848 * 2 * cluster more extent recs at most. 5849 */ 5850 if (!xi->xi_value || xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE) { 5851 5852 ret = ocfs2_refcounted_xattr_delete_need(inode, 5853 &(*ref_tree)->rf_ci, 5854 ref_root_bh, vb.vb_xv, 5855 meta_add, credits); 5856 if (ret) 5857 mlog_errno(ret); 5858 goto out; 5859 } 5860 5861 ret = ocfs2_refcount_cow_xattr(inode, di, &vb, 5862 *ref_tree, ref_root_bh, 0, 5863 le32_to_cpu(vb.vb_xv->xr_clusters), p); 5864 if (ret) 5865 mlog_errno(ret); 5866 5867 out: 5868 brelse(ref_root_bh); 5869 return ret; 5870 } 5871 5872 /* 5873 * Add the REFCOUNTED flags for all the extent rec in ocfs2_xattr_value_root. 5874 * The physical clusters will be added to refcount tree. 5875 */ 5876 static int ocfs2_xattr_value_attach_refcount(struct inode *inode, 5877 struct ocfs2_xattr_value_root *xv, 5878 struct ocfs2_extent_tree *value_et, 5879 struct ocfs2_caching_info *ref_ci, 5880 struct buffer_head *ref_root_bh, 5881 struct ocfs2_cached_dealloc_ctxt *dealloc, 5882 struct ocfs2_post_refcount *refcount) 5883 { 5884 int ret = 0; 5885 u32 clusters = le32_to_cpu(xv->xr_clusters); 5886 u32 cpos, p_cluster, num_clusters; 5887 struct ocfs2_extent_list *el = &xv->xr_list; 5888 unsigned int ext_flags; 5889 5890 cpos = 0; 5891 while (cpos < clusters) { 5892 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, 5893 &num_clusters, el, &ext_flags); 5894 5895 cpos += num_clusters; 5896 if ((ext_flags & OCFS2_EXT_REFCOUNTED)) 5897 continue; 5898 5899 BUG_ON(!p_cluster); 5900 5901 ret = ocfs2_add_refcount_flag(inode, value_et, 5902 ref_ci, ref_root_bh, 5903 cpos - num_clusters, 5904 p_cluster, num_clusters, 5905 dealloc, refcount); 5906 if (ret) { 5907 mlog_errno(ret); 5908 break; 5909 } 5910 } 5911 5912 return ret; 5913 } 5914 5915 /* 5916 * Given a normal ocfs2_xattr_header, refcount all the entries which 5917 * have value stored outside. 5918 * Used for xattrs stored in inode and ocfs2_xattr_block. 5919 */ 5920 static int ocfs2_xattr_attach_refcount_normal(struct inode *inode, 5921 struct ocfs2_xattr_value_buf *vb, 5922 struct ocfs2_xattr_header *header, 5923 struct ocfs2_caching_info *ref_ci, 5924 struct buffer_head *ref_root_bh, 5925 struct ocfs2_cached_dealloc_ctxt *dealloc) 5926 { 5927 5928 struct ocfs2_xattr_entry *xe; 5929 struct ocfs2_xattr_value_root *xv; 5930 struct ocfs2_extent_tree et; 5931 int i, ret = 0; 5932 5933 for (i = 0; i < le16_to_cpu(header->xh_count); i++) { 5934 xe = &header->xh_entries[i]; 5935 5936 if (ocfs2_xattr_is_local(xe)) 5937 continue; 5938 5939 xv = (struct ocfs2_xattr_value_root *)((void *)header + 5940 le16_to_cpu(xe->xe_name_offset) + 5941 OCFS2_XATTR_SIZE(xe->xe_name_len)); 5942 5943 vb->vb_xv = xv; 5944 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); 5945 5946 ret = ocfs2_xattr_value_attach_refcount(inode, xv, &et, 5947 ref_ci, ref_root_bh, 5948 dealloc, NULL); 5949 if (ret) { 5950 mlog_errno(ret); 5951 break; 5952 } 5953 } 5954 5955 return ret; 5956 } 5957 5958 static int ocfs2_xattr_inline_attach_refcount(struct inode *inode, 5959 struct buffer_head *fe_bh, 5960 struct ocfs2_caching_info *ref_ci, 5961 struct buffer_head *ref_root_bh, 5962 struct ocfs2_cached_dealloc_ctxt *dealloc) 5963 { 5964 struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data; 5965 struct ocfs2_xattr_header *header = (struct ocfs2_xattr_header *) 5966 (fe_bh->b_data + inode->i_sb->s_blocksize - 5967 le16_to_cpu(di->i_xattr_inline_size)); 5968 struct ocfs2_xattr_value_buf vb = { 5969 .vb_bh = fe_bh, 5970 .vb_access = ocfs2_journal_access_di, 5971 }; 5972 5973 return ocfs2_xattr_attach_refcount_normal(inode, &vb, header, 5974 ref_ci, ref_root_bh, dealloc); 5975 } 5976 5977 struct ocfs2_xattr_tree_value_refcount_para { 5978 struct ocfs2_caching_info *ref_ci; 5979 struct buffer_head *ref_root_bh; 5980 struct ocfs2_cached_dealloc_ctxt *dealloc; 5981 }; 5982 5983 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb, 5984 struct ocfs2_xattr_bucket *bucket, 5985 int offset, 5986 struct ocfs2_xattr_value_root **xv, 5987 struct buffer_head **bh) 5988 { 5989 int ret, block_off, name_offset; 5990 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 5991 struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset]; 5992 void *base; 5993 5994 ret = ocfs2_xattr_bucket_get_name_value(sb, 5995 bucket_xh(bucket), 5996 offset, 5997 &block_off, 5998 &name_offset); 5999 if (ret) { 6000 mlog_errno(ret); 6001 goto out; 6002 } 6003 6004 base = bucket_block(bucket, block_off); 6005 6006 *xv = (struct ocfs2_xattr_value_root *)(base + name_offset + 6007 OCFS2_XATTR_SIZE(xe->xe_name_len)); 6008 6009 if (bh) 6010 *bh = bucket->bu_bhs[block_off]; 6011 out: 6012 return ret; 6013 } 6014 6015 /* 6016 * For a given xattr bucket, refcount all the entries which 6017 * have value stored outside. 6018 */ 6019 static int ocfs2_xattr_bucket_value_refcount(struct inode *inode, 6020 struct ocfs2_xattr_bucket *bucket, 6021 void *para) 6022 { 6023 int i, ret = 0; 6024 struct ocfs2_extent_tree et; 6025 struct ocfs2_xattr_tree_value_refcount_para *ref = 6026 (struct ocfs2_xattr_tree_value_refcount_para *)para; 6027 struct ocfs2_xattr_header *xh = 6028 (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data; 6029 struct ocfs2_xattr_entry *xe; 6030 struct ocfs2_xattr_value_buf vb = { 6031 .vb_access = ocfs2_journal_access, 6032 }; 6033 struct ocfs2_post_refcount refcount = { 6034 .credits = bucket->bu_blocks, 6035 .para = bucket, 6036 .func = ocfs2_xattr_bucket_post_refcount, 6037 }; 6038 struct ocfs2_post_refcount *p = NULL; 6039 6040 /* We only need post_refcount if we support metaecc. */ 6041 if (ocfs2_meta_ecc(OCFS2_SB(inode->i_sb))) 6042 p = &refcount; 6043 6044 mlog(0, "refcount bucket %llu, count = %u\n", 6045 (unsigned long long)bucket_blkno(bucket), 6046 le16_to_cpu(xh->xh_count)); 6047 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 6048 xe = &xh->xh_entries[i]; 6049 6050 if (ocfs2_xattr_is_local(xe)) 6051 continue; 6052 6053 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, i, 6054 &vb.vb_xv, &vb.vb_bh); 6055 if (ret) { 6056 mlog_errno(ret); 6057 break; 6058 } 6059 6060 ocfs2_init_xattr_value_extent_tree(&et, 6061 INODE_CACHE(inode), &vb); 6062 6063 ret = ocfs2_xattr_value_attach_refcount(inode, vb.vb_xv, 6064 &et, ref->ref_ci, 6065 ref->ref_root_bh, 6066 ref->dealloc, p); 6067 if (ret) { 6068 mlog_errno(ret); 6069 break; 6070 } 6071 } 6072 6073 return ret; 6074 6075 } 6076 6077 static int ocfs2_refcount_xattr_tree_rec(struct inode *inode, 6078 struct buffer_head *root_bh, 6079 u64 blkno, u32 cpos, u32 len, void *para) 6080 { 6081 return ocfs2_iterate_xattr_buckets(inode, blkno, len, 6082 ocfs2_xattr_bucket_value_refcount, 6083 para); 6084 } 6085 6086 static int ocfs2_xattr_block_attach_refcount(struct inode *inode, 6087 struct buffer_head *blk_bh, 6088 struct ocfs2_caching_info *ref_ci, 6089 struct buffer_head *ref_root_bh, 6090 struct ocfs2_cached_dealloc_ctxt *dealloc) 6091 { 6092 int ret = 0; 6093 struct ocfs2_xattr_block *xb = 6094 (struct ocfs2_xattr_block *)blk_bh->b_data; 6095 6096 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 6097 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header; 6098 struct ocfs2_xattr_value_buf vb = { 6099 .vb_bh = blk_bh, 6100 .vb_access = ocfs2_journal_access_xb, 6101 }; 6102 6103 ret = ocfs2_xattr_attach_refcount_normal(inode, &vb, header, 6104 ref_ci, ref_root_bh, 6105 dealloc); 6106 } else { 6107 struct ocfs2_xattr_tree_value_refcount_para para = { 6108 .ref_ci = ref_ci, 6109 .ref_root_bh = ref_root_bh, 6110 .dealloc = dealloc, 6111 }; 6112 6113 ret = ocfs2_iterate_xattr_index_block(inode, blk_bh, 6114 ocfs2_refcount_xattr_tree_rec, 6115 ¶); 6116 } 6117 6118 return ret; 6119 } 6120 6121 int ocfs2_xattr_attach_refcount_tree(struct inode *inode, 6122 struct buffer_head *fe_bh, 6123 struct ocfs2_caching_info *ref_ci, 6124 struct buffer_head *ref_root_bh, 6125 struct ocfs2_cached_dealloc_ctxt *dealloc) 6126 { 6127 int ret = 0; 6128 struct ocfs2_inode_info *oi = OCFS2_I(inode); 6129 struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data; 6130 struct buffer_head *blk_bh = NULL; 6131 6132 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 6133 ret = ocfs2_xattr_inline_attach_refcount(inode, fe_bh, 6134 ref_ci, ref_root_bh, 6135 dealloc); 6136 if (ret) { 6137 mlog_errno(ret); 6138 goto out; 6139 } 6140 } 6141 6142 if (!di->i_xattr_loc) 6143 goto out; 6144 6145 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc), 6146 &blk_bh); 6147 if (ret < 0) { 6148 mlog_errno(ret); 6149 goto out; 6150 } 6151 6152 ret = ocfs2_xattr_block_attach_refcount(inode, blk_bh, ref_ci, 6153 ref_root_bh, dealloc); 6154 if (ret) 6155 mlog_errno(ret); 6156 6157 brelse(blk_bh); 6158 out: 6159 6160 return ret; 6161 } 6162 6163 typedef int (should_xattr_reflinked)(struct ocfs2_xattr_entry *xe); 6164 /* 6165 * Store the information we need in xattr reflink. 6166 * old_bh and new_bh are inode bh for the old and new inode. 6167 */ 6168 struct ocfs2_xattr_reflink { 6169 struct inode *old_inode; 6170 struct inode *new_inode; 6171 struct buffer_head *old_bh; 6172 struct buffer_head *new_bh; 6173 struct ocfs2_caching_info *ref_ci; 6174 struct buffer_head *ref_root_bh; 6175 struct ocfs2_cached_dealloc_ctxt *dealloc; 6176 should_xattr_reflinked *xattr_reflinked; 6177 }; 6178 6179 /* 6180 * Given a xattr header and xe offset, 6181 * return the proper xv and the corresponding bh. 6182 * xattr in inode, block and xattr tree have different implementaions. 6183 */ 6184 typedef int (get_xattr_value_root)(struct super_block *sb, 6185 struct buffer_head *bh, 6186 struct ocfs2_xattr_header *xh, 6187 int offset, 6188 struct ocfs2_xattr_value_root **xv, 6189 struct buffer_head **ret_bh, 6190 void *para); 6191 6192 /* 6193 * Calculate all the xattr value root metadata stored in this xattr header and 6194 * credits we need if we create them from the scratch. 6195 * We use get_xattr_value_root so that all types of xattr container can use it. 6196 */ 6197 static int ocfs2_value_metas_in_xattr_header(struct super_block *sb, 6198 struct buffer_head *bh, 6199 struct ocfs2_xattr_header *xh, 6200 int *metas, int *credits, 6201 int *num_recs, 6202 get_xattr_value_root *func, 6203 void *para) 6204 { 6205 int i, ret = 0; 6206 struct ocfs2_xattr_value_root *xv; 6207 struct ocfs2_xattr_entry *xe; 6208 6209 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 6210 xe = &xh->xh_entries[i]; 6211 if (ocfs2_xattr_is_local(xe)) 6212 continue; 6213 6214 ret = func(sb, bh, xh, i, &xv, NULL, para); 6215 if (ret) { 6216 mlog_errno(ret); 6217 break; 6218 } 6219 6220 *metas += le16_to_cpu(xv->xr_list.l_tree_depth) * 6221 le16_to_cpu(xv->xr_list.l_next_free_rec); 6222 6223 *credits += ocfs2_calc_extend_credits(sb, 6224 &def_xv.xv.xr_list, 6225 le32_to_cpu(xv->xr_clusters)); 6226 6227 /* 6228 * If the value is a tree with depth > 1, We don't go deep 6229 * to the extent block, so just calculate a maximum record num. 6230 */ 6231 if (!xv->xr_list.l_tree_depth) 6232 *num_recs += le16_to_cpu(xv->xr_list.l_next_free_rec); 6233 else 6234 *num_recs += ocfs2_clusters_for_bytes(sb, 6235 XATTR_SIZE_MAX); 6236 } 6237 6238 return ret; 6239 } 6240 6241 /* Used by xattr inode and block to return the right xv and buffer_head. */ 6242 static int ocfs2_get_xattr_value_root(struct super_block *sb, 6243 struct buffer_head *bh, 6244 struct ocfs2_xattr_header *xh, 6245 int offset, 6246 struct ocfs2_xattr_value_root **xv, 6247 struct buffer_head **ret_bh, 6248 void *para) 6249 { 6250 struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset]; 6251 6252 *xv = (struct ocfs2_xattr_value_root *)((void *)xh + 6253 le16_to_cpu(xe->xe_name_offset) + 6254 OCFS2_XATTR_SIZE(xe->xe_name_len)); 6255 6256 if (ret_bh) 6257 *ret_bh = bh; 6258 6259 return 0; 6260 } 6261 6262 /* 6263 * Lock the meta_ac and caculate how much credits we need for reflink xattrs. 6264 * It is only used for inline xattr and xattr block. 6265 */ 6266 static int ocfs2_reflink_lock_xattr_allocators(struct ocfs2_super *osb, 6267 struct ocfs2_xattr_header *xh, 6268 struct buffer_head *ref_root_bh, 6269 int *credits, 6270 struct ocfs2_alloc_context **meta_ac) 6271 { 6272 int ret, meta_add = 0, num_recs = 0; 6273 struct ocfs2_refcount_block *rb = 6274 (struct ocfs2_refcount_block *)ref_root_bh->b_data; 6275 6276 *credits = 0; 6277 6278 ret = ocfs2_value_metas_in_xattr_header(osb->sb, NULL, xh, 6279 &meta_add, credits, &num_recs, 6280 ocfs2_get_xattr_value_root, 6281 NULL); 6282 if (ret) { 6283 mlog_errno(ret); 6284 goto out; 6285 } 6286 6287 /* 6288 * We need to add/modify num_recs in refcount tree, so just calculate 6289 * an approximate number we need for refcount tree change. 6290 * Sometimes we need to split the tree, and after split, half recs 6291 * will be moved to the new block, and a new block can only provide 6292 * half number of recs. So we multiple new blocks by 2. 6293 */ 6294 num_recs = num_recs / ocfs2_refcount_recs_per_rb(osb->sb) * 2; 6295 meta_add += num_recs; 6296 *credits += num_recs + num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS; 6297 if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL) 6298 *credits += le16_to_cpu(rb->rf_list.l_tree_depth) * 6299 le16_to_cpu(rb->rf_list.l_next_free_rec) + 1; 6300 else 6301 *credits += 1; 6302 6303 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, meta_ac); 6304 if (ret) 6305 mlog_errno(ret); 6306 6307 out: 6308 return ret; 6309 } 6310 6311 /* 6312 * Given a xattr header, reflink all the xattrs in this container. 6313 * It can be used for inode, block and bucket. 6314 * 6315 * NOTE: 6316 * Before we call this function, the caller has memcpy the xattr in 6317 * old_xh to the new_xh. 6318 * 6319 * If args.xattr_reflinked is set, call it to decide whether the xe should 6320 * be reflinked or not. If not, remove it from the new xattr header. 6321 */ 6322 static int ocfs2_reflink_xattr_header(handle_t *handle, 6323 struct ocfs2_xattr_reflink *args, 6324 struct buffer_head *old_bh, 6325 struct ocfs2_xattr_header *xh, 6326 struct buffer_head *new_bh, 6327 struct ocfs2_xattr_header *new_xh, 6328 struct ocfs2_xattr_value_buf *vb, 6329 struct ocfs2_alloc_context *meta_ac, 6330 get_xattr_value_root *func, 6331 void *para) 6332 { 6333 int ret = 0, i, j; 6334 struct super_block *sb = args->old_inode->i_sb; 6335 struct buffer_head *value_bh; 6336 struct ocfs2_xattr_entry *xe, *last; 6337 struct ocfs2_xattr_value_root *xv, *new_xv; 6338 struct ocfs2_extent_tree data_et; 6339 u32 clusters, cpos, p_cluster, num_clusters; 6340 unsigned int ext_flags = 0; 6341 6342 mlog(0, "reflink xattr in container %llu, count = %u\n", 6343 (unsigned long long)old_bh->b_blocknr, le16_to_cpu(xh->xh_count)); 6344 6345 last = &new_xh->xh_entries[le16_to_cpu(new_xh->xh_count)]; 6346 for (i = 0, j = 0; i < le16_to_cpu(xh->xh_count); i++, j++) { 6347 xe = &xh->xh_entries[i]; 6348 6349 if (args->xattr_reflinked && !args->xattr_reflinked(xe)) { 6350 xe = &new_xh->xh_entries[j]; 6351 6352 le16_add_cpu(&new_xh->xh_count, -1); 6353 if (new_xh->xh_count) { 6354 memmove(xe, xe + 1, 6355 (void *)last - (void *)xe); 6356 memset(last, 0, 6357 sizeof(struct ocfs2_xattr_entry)); 6358 } 6359 6360 /* 6361 * We don't want j to increase in the next round since 6362 * it is already moved ahead. 6363 */ 6364 j--; 6365 continue; 6366 } 6367 6368 if (ocfs2_xattr_is_local(xe)) 6369 continue; 6370 6371 ret = func(sb, old_bh, xh, i, &xv, NULL, para); 6372 if (ret) { 6373 mlog_errno(ret); 6374 break; 6375 } 6376 6377 ret = func(sb, new_bh, new_xh, j, &new_xv, &value_bh, para); 6378 if (ret) { 6379 mlog_errno(ret); 6380 break; 6381 } 6382 6383 /* 6384 * For the xattr which has l_tree_depth = 0, all the extent 6385 * recs have already be copied to the new xh with the 6386 * propriate OCFS2_EXT_REFCOUNTED flag we just need to 6387 * increase the refount count int the refcount tree. 6388 * 6389 * For the xattr which has l_tree_depth > 0, we need 6390 * to initialize it to the empty default value root, 6391 * and then insert the extents one by one. 6392 */ 6393 if (xv->xr_list.l_tree_depth) { 6394 memcpy(new_xv, &def_xv, sizeof(def_xv)); 6395 vb->vb_xv = new_xv; 6396 vb->vb_bh = value_bh; 6397 ocfs2_init_xattr_value_extent_tree(&data_et, 6398 INODE_CACHE(args->new_inode), vb); 6399 } 6400 6401 clusters = le32_to_cpu(xv->xr_clusters); 6402 cpos = 0; 6403 while (cpos < clusters) { 6404 ret = ocfs2_xattr_get_clusters(args->old_inode, 6405 cpos, 6406 &p_cluster, 6407 &num_clusters, 6408 &xv->xr_list, 6409 &ext_flags); 6410 if (ret) { 6411 mlog_errno(ret); 6412 goto out; 6413 } 6414 6415 BUG_ON(!p_cluster); 6416 6417 if (xv->xr_list.l_tree_depth) { 6418 ret = ocfs2_insert_extent(handle, 6419 &data_et, cpos, 6420 ocfs2_clusters_to_blocks( 6421 args->old_inode->i_sb, 6422 p_cluster), 6423 num_clusters, ext_flags, 6424 meta_ac); 6425 if (ret) { 6426 mlog_errno(ret); 6427 goto out; 6428 } 6429 } 6430 6431 ret = ocfs2_increase_refcount(handle, args->ref_ci, 6432 args->ref_root_bh, 6433 p_cluster, num_clusters, 6434 meta_ac, args->dealloc); 6435 if (ret) { 6436 mlog_errno(ret); 6437 goto out; 6438 } 6439 6440 cpos += num_clusters; 6441 } 6442 } 6443 6444 out: 6445 return ret; 6446 } 6447 6448 static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args) 6449 { 6450 int ret = 0, credits = 0; 6451 handle_t *handle; 6452 struct ocfs2_super *osb = OCFS2_SB(args->old_inode->i_sb); 6453 struct ocfs2_dinode *di = (struct ocfs2_dinode *)args->old_bh->b_data; 6454 int inline_size = le16_to_cpu(di->i_xattr_inline_size); 6455 int header_off = osb->sb->s_blocksize - inline_size; 6456 struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *) 6457 (args->old_bh->b_data + header_off); 6458 struct ocfs2_xattr_header *new_xh = (struct ocfs2_xattr_header *) 6459 (args->new_bh->b_data + header_off); 6460 struct ocfs2_alloc_context *meta_ac = NULL; 6461 struct ocfs2_inode_info *new_oi; 6462 struct ocfs2_dinode *new_di; 6463 struct ocfs2_xattr_value_buf vb = { 6464 .vb_bh = args->new_bh, 6465 .vb_access = ocfs2_journal_access_di, 6466 }; 6467 6468 ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh, 6469 &credits, &meta_ac); 6470 if (ret) { 6471 mlog_errno(ret); 6472 goto out; 6473 } 6474 6475 handle = ocfs2_start_trans(osb, credits); 6476 if (IS_ERR(handle)) { 6477 ret = PTR_ERR(handle); 6478 mlog_errno(ret); 6479 goto out; 6480 } 6481 6482 ret = ocfs2_journal_access_di(handle, INODE_CACHE(args->new_inode), 6483 args->new_bh, OCFS2_JOURNAL_ACCESS_WRITE); 6484 if (ret) { 6485 mlog_errno(ret); 6486 goto out_commit; 6487 } 6488 6489 memcpy(args->new_bh->b_data + header_off, 6490 args->old_bh->b_data + header_off, inline_size); 6491 6492 new_di = (struct ocfs2_dinode *)args->new_bh->b_data; 6493 new_di->i_xattr_inline_size = cpu_to_le16(inline_size); 6494 6495 ret = ocfs2_reflink_xattr_header(handle, args, args->old_bh, xh, 6496 args->new_bh, new_xh, &vb, meta_ac, 6497 ocfs2_get_xattr_value_root, NULL); 6498 if (ret) { 6499 mlog_errno(ret); 6500 goto out_commit; 6501 } 6502 6503 new_oi = OCFS2_I(args->new_inode); 6504 spin_lock(&new_oi->ip_lock); 6505 new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL; 6506 new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features); 6507 spin_unlock(&new_oi->ip_lock); 6508 6509 ocfs2_journal_dirty(handle, args->new_bh); 6510 6511 out_commit: 6512 ocfs2_commit_trans(osb, handle); 6513 6514 out: 6515 if (meta_ac) 6516 ocfs2_free_alloc_context(meta_ac); 6517 return ret; 6518 } 6519 6520 static int ocfs2_create_empty_xattr_block(struct inode *inode, 6521 struct buffer_head *fe_bh, 6522 struct buffer_head **ret_bh, 6523 int indexed) 6524 { 6525 int ret; 6526 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 6527 struct ocfs2_xattr_set_ctxt ctxt; 6528 6529 memset(&ctxt, 0, sizeof(ctxt)); 6530 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &ctxt.meta_ac); 6531 if (ret < 0) { 6532 mlog_errno(ret); 6533 return ret; 6534 } 6535 6536 ctxt.handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_CREATE_CREDITS); 6537 if (IS_ERR(ctxt.handle)) { 6538 ret = PTR_ERR(ctxt.handle); 6539 mlog_errno(ret); 6540 goto out; 6541 } 6542 6543 mlog(0, "create new xattr block for inode %llu, index = %d\n", 6544 (unsigned long long)fe_bh->b_blocknr, indexed); 6545 ret = ocfs2_create_xattr_block(inode, fe_bh, &ctxt, indexed, 6546 ret_bh); 6547 if (ret) 6548 mlog_errno(ret); 6549 6550 ocfs2_commit_trans(osb, ctxt.handle); 6551 out: 6552 ocfs2_free_alloc_context(ctxt.meta_ac); 6553 return ret; 6554 } 6555 6556 static int ocfs2_reflink_xattr_block(struct ocfs2_xattr_reflink *args, 6557 struct buffer_head *blk_bh, 6558 struct buffer_head *new_blk_bh) 6559 { 6560 int ret = 0, credits = 0; 6561 handle_t *handle; 6562 struct ocfs2_inode_info *new_oi = OCFS2_I(args->new_inode); 6563 struct ocfs2_dinode *new_di; 6564 struct ocfs2_super *osb = OCFS2_SB(args->new_inode->i_sb); 6565 int header_off = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); 6566 struct ocfs2_xattr_block *xb = 6567 (struct ocfs2_xattr_block *)blk_bh->b_data; 6568 struct ocfs2_xattr_header *xh = &xb->xb_attrs.xb_header; 6569 struct ocfs2_xattr_block *new_xb = 6570 (struct ocfs2_xattr_block *)new_blk_bh->b_data; 6571 struct ocfs2_xattr_header *new_xh = &new_xb->xb_attrs.xb_header; 6572 struct ocfs2_alloc_context *meta_ac; 6573 struct ocfs2_xattr_value_buf vb = { 6574 .vb_bh = new_blk_bh, 6575 .vb_access = ocfs2_journal_access_xb, 6576 }; 6577 6578 ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh, 6579 &credits, &meta_ac); 6580 if (ret) { 6581 mlog_errno(ret); 6582 return ret; 6583 } 6584 6585 /* One more credits in case we need to add xattr flags in new inode. */ 6586 handle = ocfs2_start_trans(osb, credits + 1); 6587 if (IS_ERR(handle)) { 6588 ret = PTR_ERR(handle); 6589 mlog_errno(ret); 6590 goto out; 6591 } 6592 6593 if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) { 6594 ret = ocfs2_journal_access_di(handle, 6595 INODE_CACHE(args->new_inode), 6596 args->new_bh, 6597 OCFS2_JOURNAL_ACCESS_WRITE); 6598 if (ret) { 6599 mlog_errno(ret); 6600 goto out_commit; 6601 } 6602 } 6603 6604 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(args->new_inode), 6605 new_blk_bh, OCFS2_JOURNAL_ACCESS_WRITE); 6606 if (ret) { 6607 mlog_errno(ret); 6608 goto out_commit; 6609 } 6610 6611 memcpy(new_blk_bh->b_data + header_off, blk_bh->b_data + header_off, 6612 osb->sb->s_blocksize - header_off); 6613 6614 ret = ocfs2_reflink_xattr_header(handle, args, blk_bh, xh, 6615 new_blk_bh, new_xh, &vb, meta_ac, 6616 ocfs2_get_xattr_value_root, NULL); 6617 if (ret) { 6618 mlog_errno(ret); 6619 goto out_commit; 6620 } 6621 6622 ocfs2_journal_dirty(handle, new_blk_bh); 6623 6624 if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) { 6625 new_di = (struct ocfs2_dinode *)args->new_bh->b_data; 6626 spin_lock(&new_oi->ip_lock); 6627 new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL; 6628 new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features); 6629 spin_unlock(&new_oi->ip_lock); 6630 6631 ocfs2_journal_dirty(handle, args->new_bh); 6632 } 6633 6634 out_commit: 6635 ocfs2_commit_trans(osb, handle); 6636 6637 out: 6638 ocfs2_free_alloc_context(meta_ac); 6639 return ret; 6640 } 6641 6642 struct ocfs2_reflink_xattr_tree_args { 6643 struct ocfs2_xattr_reflink *reflink; 6644 struct buffer_head *old_blk_bh; 6645 struct buffer_head *new_blk_bh; 6646 struct ocfs2_xattr_bucket *old_bucket; 6647 struct ocfs2_xattr_bucket *new_bucket; 6648 }; 6649 6650 /* 6651 * NOTE: 6652 * We have to handle the case that both old bucket and new bucket 6653 * will call this function to get the right ret_bh. 6654 * So The caller must give us the right bh. 6655 */ 6656 static int ocfs2_get_reflink_xattr_value_root(struct super_block *sb, 6657 struct buffer_head *bh, 6658 struct ocfs2_xattr_header *xh, 6659 int offset, 6660 struct ocfs2_xattr_value_root **xv, 6661 struct buffer_head **ret_bh, 6662 void *para) 6663 { 6664 struct ocfs2_reflink_xattr_tree_args *args = 6665 (struct ocfs2_reflink_xattr_tree_args *)para; 6666 struct ocfs2_xattr_bucket *bucket; 6667 6668 if (bh == args->old_bucket->bu_bhs[0]) 6669 bucket = args->old_bucket; 6670 else 6671 bucket = args->new_bucket; 6672 6673 return ocfs2_get_xattr_tree_value_root(sb, bucket, offset, 6674 xv, ret_bh); 6675 } 6676 6677 struct ocfs2_value_tree_metas { 6678 int num_metas; 6679 int credits; 6680 int num_recs; 6681 }; 6682 6683 static int ocfs2_value_tree_metas_in_bucket(struct super_block *sb, 6684 struct buffer_head *bh, 6685 struct ocfs2_xattr_header *xh, 6686 int offset, 6687 struct ocfs2_xattr_value_root **xv, 6688 struct buffer_head **ret_bh, 6689 void *para) 6690 { 6691 struct ocfs2_xattr_bucket *bucket = 6692 (struct ocfs2_xattr_bucket *)para; 6693 6694 return ocfs2_get_xattr_tree_value_root(sb, bucket, offset, 6695 xv, ret_bh); 6696 } 6697 6698 static int ocfs2_calc_value_tree_metas(struct inode *inode, 6699 struct ocfs2_xattr_bucket *bucket, 6700 void *para) 6701 { 6702 struct ocfs2_value_tree_metas *metas = 6703 (struct ocfs2_value_tree_metas *)para; 6704 struct ocfs2_xattr_header *xh = 6705 (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data; 6706 6707 /* Add the credits for this bucket first. */ 6708 metas->credits += bucket->bu_blocks; 6709 return ocfs2_value_metas_in_xattr_header(inode->i_sb, bucket->bu_bhs[0], 6710 xh, &metas->num_metas, 6711 &metas->credits, &metas->num_recs, 6712 ocfs2_value_tree_metas_in_bucket, 6713 bucket); 6714 } 6715 6716 /* 6717 * Given a xattr extent rec starting from blkno and having len clusters, 6718 * iterate all the buckets calculate how much metadata we need for reflinking 6719 * all the ocfs2_xattr_value_root and lock the allocators accordingly. 6720 */ 6721 static int ocfs2_lock_reflink_xattr_rec_allocators( 6722 struct ocfs2_reflink_xattr_tree_args *args, 6723 struct ocfs2_extent_tree *xt_et, 6724 u64 blkno, u32 len, int *credits, 6725 struct ocfs2_alloc_context **meta_ac, 6726 struct ocfs2_alloc_context **data_ac) 6727 { 6728 int ret, num_free_extents; 6729 struct ocfs2_value_tree_metas metas; 6730 struct ocfs2_super *osb = OCFS2_SB(args->reflink->old_inode->i_sb); 6731 struct ocfs2_refcount_block *rb; 6732 6733 memset(&metas, 0, sizeof(metas)); 6734 6735 ret = ocfs2_iterate_xattr_buckets(args->reflink->old_inode, blkno, len, 6736 ocfs2_calc_value_tree_metas, &metas); 6737 if (ret) { 6738 mlog_errno(ret); 6739 goto out; 6740 } 6741 6742 *credits = metas.credits; 6743 6744 /* 6745 * Calculate we need for refcount tree change. 6746 * 6747 * We need to add/modify num_recs in refcount tree, so just calculate 6748 * an approximate number we need for refcount tree change. 6749 * Sometimes we need to split the tree, and after split, half recs 6750 * will be moved to the new block, and a new block can only provide 6751 * half number of recs. So we multiple new blocks by 2. 6752 * In the end, we have to add credits for modifying the already 6753 * existed refcount block. 6754 */ 6755 rb = (struct ocfs2_refcount_block *)args->reflink->ref_root_bh->b_data; 6756 metas.num_recs = 6757 (metas.num_recs + ocfs2_refcount_recs_per_rb(osb->sb) - 1) / 6758 ocfs2_refcount_recs_per_rb(osb->sb) * 2; 6759 metas.num_metas += metas.num_recs; 6760 *credits += metas.num_recs + 6761 metas.num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS; 6762 if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL) 6763 *credits += le16_to_cpu(rb->rf_list.l_tree_depth) * 6764 le16_to_cpu(rb->rf_list.l_next_free_rec) + 1; 6765 else 6766 *credits += 1; 6767 6768 /* count in the xattr tree change. */ 6769 num_free_extents = ocfs2_num_free_extents(osb, xt_et); 6770 if (num_free_extents < 0) { 6771 ret = num_free_extents; 6772 mlog_errno(ret); 6773 goto out; 6774 } 6775 6776 if (num_free_extents < len) 6777 metas.num_metas += ocfs2_extend_meta_needed(xt_et->et_root_el); 6778 6779 *credits += ocfs2_calc_extend_credits(osb->sb, 6780 xt_et->et_root_el, len); 6781 6782 if (metas.num_metas) { 6783 ret = ocfs2_reserve_new_metadata_blocks(osb, metas.num_metas, 6784 meta_ac); 6785 if (ret) { 6786 mlog_errno(ret); 6787 goto out; 6788 } 6789 } 6790 6791 if (len) { 6792 ret = ocfs2_reserve_clusters(osb, len, data_ac); 6793 if (ret) 6794 mlog_errno(ret); 6795 } 6796 out: 6797 if (ret) { 6798 if (*meta_ac) { 6799 ocfs2_free_alloc_context(*meta_ac); 6800 meta_ac = NULL; 6801 } 6802 } 6803 6804 return ret; 6805 } 6806 6807 static int ocfs2_reflink_xattr_bucket(handle_t *handle, 6808 u64 blkno, u64 new_blkno, u32 clusters, 6809 u32 *cpos, int num_buckets, 6810 struct ocfs2_alloc_context *meta_ac, 6811 struct ocfs2_alloc_context *data_ac, 6812 struct ocfs2_reflink_xattr_tree_args *args) 6813 { 6814 int i, j, ret = 0; 6815 struct super_block *sb = args->reflink->old_inode->i_sb; 6816 int bpb = args->old_bucket->bu_blocks; 6817 struct ocfs2_xattr_value_buf vb = { 6818 .vb_access = ocfs2_journal_access, 6819 }; 6820 6821 for (i = 0; i < num_buckets; i++, blkno += bpb, new_blkno += bpb) { 6822 ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno); 6823 if (ret) { 6824 mlog_errno(ret); 6825 break; 6826 } 6827 6828 ret = ocfs2_init_xattr_bucket(args->new_bucket, new_blkno); 6829 if (ret) { 6830 mlog_errno(ret); 6831 break; 6832 } 6833 6834 ret = ocfs2_xattr_bucket_journal_access(handle, 6835 args->new_bucket, 6836 OCFS2_JOURNAL_ACCESS_CREATE); 6837 if (ret) { 6838 mlog_errno(ret); 6839 break; 6840 } 6841 6842 for (j = 0; j < bpb; j++) 6843 memcpy(bucket_block(args->new_bucket, j), 6844 bucket_block(args->old_bucket, j), 6845 sb->s_blocksize); 6846 6847 /* 6848 * Record the start cpos so that we can use it to initialize 6849 * our xattr tree we also set the xh_num_bucket for the new 6850 * bucket. 6851 */ 6852 if (i == 0) { 6853 *cpos = le32_to_cpu(bucket_xh(args->new_bucket)-> 6854 xh_entries[0].xe_name_hash); 6855 bucket_xh(args->new_bucket)->xh_num_buckets = 6856 cpu_to_le16(num_buckets); 6857 } 6858 6859 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket); 6860 6861 ret = ocfs2_reflink_xattr_header(handle, args->reflink, 6862 args->old_bucket->bu_bhs[0], 6863 bucket_xh(args->old_bucket), 6864 args->new_bucket->bu_bhs[0], 6865 bucket_xh(args->new_bucket), 6866 &vb, meta_ac, 6867 ocfs2_get_reflink_xattr_value_root, 6868 args); 6869 if (ret) { 6870 mlog_errno(ret); 6871 break; 6872 } 6873 6874 /* 6875 * Re-access and dirty the bucket to calculate metaecc. 6876 * Because we may extend the transaction in reflink_xattr_header 6877 * which will let the already accessed block gone. 6878 */ 6879 ret = ocfs2_xattr_bucket_journal_access(handle, 6880 args->new_bucket, 6881 OCFS2_JOURNAL_ACCESS_WRITE); 6882 if (ret) { 6883 mlog_errno(ret); 6884 break; 6885 } 6886 6887 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket); 6888 6889 ocfs2_xattr_bucket_relse(args->old_bucket); 6890 ocfs2_xattr_bucket_relse(args->new_bucket); 6891 } 6892 6893 ocfs2_xattr_bucket_relse(args->old_bucket); 6894 ocfs2_xattr_bucket_relse(args->new_bucket); 6895 return ret; 6896 } 6897 6898 static int ocfs2_reflink_xattr_buckets(handle_t *handle, 6899 struct inode *inode, 6900 struct ocfs2_reflink_xattr_tree_args *args, 6901 struct ocfs2_extent_tree *et, 6902 struct ocfs2_alloc_context *meta_ac, 6903 struct ocfs2_alloc_context *data_ac, 6904 u64 blkno, u32 cpos, u32 len) 6905 { 6906 int ret, first_inserted = 0; 6907 u32 p_cluster, num_clusters, reflink_cpos = 0; 6908 u64 new_blkno; 6909 unsigned int num_buckets, reflink_buckets; 6910 unsigned int bpc = 6911 ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)); 6912 6913 ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno); 6914 if (ret) { 6915 mlog_errno(ret); 6916 goto out; 6917 } 6918 num_buckets = le16_to_cpu(bucket_xh(args->old_bucket)->xh_num_buckets); 6919 ocfs2_xattr_bucket_relse(args->old_bucket); 6920 6921 while (len && num_buckets) { 6922 ret = ocfs2_claim_clusters(handle, data_ac, 6923 1, &p_cluster, &num_clusters); 6924 if (ret) { 6925 mlog_errno(ret); 6926 goto out; 6927 } 6928 6929 new_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 6930 reflink_buckets = min(num_buckets, bpc * num_clusters); 6931 6932 ret = ocfs2_reflink_xattr_bucket(handle, blkno, 6933 new_blkno, num_clusters, 6934 &reflink_cpos, reflink_buckets, 6935 meta_ac, data_ac, args); 6936 if (ret) { 6937 mlog_errno(ret); 6938 goto out; 6939 } 6940 6941 /* 6942 * For the 1st allocated cluster, we make it use the same cpos 6943 * so that the xattr tree looks the same as the original one 6944 * in the most case. 6945 */ 6946 if (!first_inserted) { 6947 reflink_cpos = cpos; 6948 first_inserted = 1; 6949 } 6950 ret = ocfs2_insert_extent(handle, et, reflink_cpos, new_blkno, 6951 num_clusters, 0, meta_ac); 6952 if (ret) 6953 mlog_errno(ret); 6954 6955 mlog(0, "insert new xattr extent rec start %llu len %u to %u\n", 6956 (unsigned long long)new_blkno, num_clusters, reflink_cpos); 6957 6958 len -= num_clusters; 6959 blkno += ocfs2_clusters_to_blocks(inode->i_sb, num_clusters); 6960 num_buckets -= reflink_buckets; 6961 } 6962 out: 6963 return ret; 6964 } 6965 6966 /* 6967 * Create the same xattr extent record in the new inode's xattr tree. 6968 */ 6969 static int ocfs2_reflink_xattr_rec(struct inode *inode, 6970 struct buffer_head *root_bh, 6971 u64 blkno, 6972 u32 cpos, 6973 u32 len, 6974 void *para) 6975 { 6976 int ret, credits = 0; 6977 handle_t *handle; 6978 struct ocfs2_reflink_xattr_tree_args *args = 6979 (struct ocfs2_reflink_xattr_tree_args *)para; 6980 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 6981 struct ocfs2_alloc_context *meta_ac = NULL; 6982 struct ocfs2_alloc_context *data_ac = NULL; 6983 struct ocfs2_extent_tree et; 6984 6985 mlog(0, "reflink xattr buckets %llu len %u\n", 6986 (unsigned long long)blkno, len); 6987 6988 ocfs2_init_xattr_tree_extent_tree(&et, 6989 INODE_CACHE(args->reflink->new_inode), 6990 args->new_blk_bh); 6991 6992 ret = ocfs2_lock_reflink_xattr_rec_allocators(args, &et, blkno, 6993 len, &credits, 6994 &meta_ac, &data_ac); 6995 if (ret) { 6996 mlog_errno(ret); 6997 goto out; 6998 } 6999 7000 handle = ocfs2_start_trans(osb, credits); 7001 if (IS_ERR(handle)) { 7002 ret = PTR_ERR(handle); 7003 mlog_errno(ret); 7004 goto out; 7005 } 7006 7007 ret = ocfs2_reflink_xattr_buckets(handle, inode, args, &et, 7008 meta_ac, data_ac, 7009 blkno, cpos, len); 7010 if (ret) 7011 mlog_errno(ret); 7012 7013 ocfs2_commit_trans(osb, handle); 7014 7015 out: 7016 if (meta_ac) 7017 ocfs2_free_alloc_context(meta_ac); 7018 if (data_ac) 7019 ocfs2_free_alloc_context(data_ac); 7020 return ret; 7021 } 7022 7023 /* 7024 * Create reflinked xattr buckets. 7025 * We will add bucket one by one, and refcount all the xattrs in the bucket 7026 * if they are stored outside. 7027 */ 7028 static int ocfs2_reflink_xattr_tree(struct ocfs2_xattr_reflink *args, 7029 struct buffer_head *blk_bh, 7030 struct buffer_head *new_blk_bh) 7031 { 7032 int ret; 7033 struct ocfs2_reflink_xattr_tree_args para; 7034 7035 memset(¶, 0, sizeof(para)); 7036 para.reflink = args; 7037 para.old_blk_bh = blk_bh; 7038 para.new_blk_bh = new_blk_bh; 7039 7040 para.old_bucket = ocfs2_xattr_bucket_new(args->old_inode); 7041 if (!para.old_bucket) { 7042 mlog_errno(-ENOMEM); 7043 return -ENOMEM; 7044 } 7045 7046 para.new_bucket = ocfs2_xattr_bucket_new(args->new_inode); 7047 if (!para.new_bucket) { 7048 ret = -ENOMEM; 7049 mlog_errno(ret); 7050 goto out; 7051 } 7052 7053 ret = ocfs2_iterate_xattr_index_block(args->old_inode, blk_bh, 7054 ocfs2_reflink_xattr_rec, 7055 ¶); 7056 if (ret) 7057 mlog_errno(ret); 7058 7059 out: 7060 ocfs2_xattr_bucket_free(para.old_bucket); 7061 ocfs2_xattr_bucket_free(para.new_bucket); 7062 return ret; 7063 } 7064 7065 static int ocfs2_reflink_xattr_in_block(struct ocfs2_xattr_reflink *args, 7066 struct buffer_head *blk_bh) 7067 { 7068 int ret, indexed = 0; 7069 struct buffer_head *new_blk_bh = NULL; 7070 struct ocfs2_xattr_block *xb = 7071 (struct ocfs2_xattr_block *)blk_bh->b_data; 7072 7073 7074 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) 7075 indexed = 1; 7076 7077 ret = ocfs2_create_empty_xattr_block(args->new_inode, args->new_bh, 7078 &new_blk_bh, indexed); 7079 if (ret) { 7080 mlog_errno(ret); 7081 goto out; 7082 } 7083 7084 if (!indexed) 7085 ret = ocfs2_reflink_xattr_block(args, blk_bh, new_blk_bh); 7086 else 7087 ret = ocfs2_reflink_xattr_tree(args, blk_bh, new_blk_bh); 7088 if (ret) 7089 mlog_errno(ret); 7090 7091 out: 7092 brelse(new_blk_bh); 7093 return ret; 7094 } 7095 7096 static int ocfs2_reflink_xattr_no_security(struct ocfs2_xattr_entry *xe) 7097 { 7098 int type = ocfs2_xattr_get_type(xe); 7099 7100 return type != OCFS2_XATTR_INDEX_SECURITY && 7101 type != OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS && 7102 type != OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT; 7103 } 7104 7105 int ocfs2_reflink_xattrs(struct inode *old_inode, 7106 struct buffer_head *old_bh, 7107 struct inode *new_inode, 7108 struct buffer_head *new_bh, 7109 bool preserve_security) 7110 { 7111 int ret; 7112 struct ocfs2_xattr_reflink args; 7113 struct ocfs2_inode_info *oi = OCFS2_I(old_inode); 7114 struct ocfs2_dinode *di = (struct ocfs2_dinode *)old_bh->b_data; 7115 struct buffer_head *blk_bh = NULL; 7116 struct ocfs2_cached_dealloc_ctxt dealloc; 7117 struct ocfs2_refcount_tree *ref_tree; 7118 struct buffer_head *ref_root_bh = NULL; 7119 7120 ret = ocfs2_lock_refcount_tree(OCFS2_SB(old_inode->i_sb), 7121 le64_to_cpu(di->i_refcount_loc), 7122 1, &ref_tree, &ref_root_bh); 7123 if (ret) { 7124 mlog_errno(ret); 7125 goto out; 7126 } 7127 7128 ocfs2_init_dealloc_ctxt(&dealloc); 7129 7130 args.old_inode = old_inode; 7131 args.new_inode = new_inode; 7132 args.old_bh = old_bh; 7133 args.new_bh = new_bh; 7134 args.ref_ci = &ref_tree->rf_ci; 7135 args.ref_root_bh = ref_root_bh; 7136 args.dealloc = &dealloc; 7137 if (preserve_security) 7138 args.xattr_reflinked = NULL; 7139 else 7140 args.xattr_reflinked = ocfs2_reflink_xattr_no_security; 7141 7142 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 7143 ret = ocfs2_reflink_xattr_inline(&args); 7144 if (ret) { 7145 mlog_errno(ret); 7146 goto out_unlock; 7147 } 7148 } 7149 7150 if (!di->i_xattr_loc) 7151 goto out_unlock; 7152 7153 ret = ocfs2_read_xattr_block(old_inode, le64_to_cpu(di->i_xattr_loc), 7154 &blk_bh); 7155 if (ret < 0) { 7156 mlog_errno(ret); 7157 goto out_unlock; 7158 } 7159 7160 ret = ocfs2_reflink_xattr_in_block(&args, blk_bh); 7161 if (ret) 7162 mlog_errno(ret); 7163 7164 brelse(blk_bh); 7165 7166 out_unlock: 7167 ocfs2_unlock_refcount_tree(OCFS2_SB(old_inode->i_sb), 7168 ref_tree, 1); 7169 brelse(ref_root_bh); 7170 7171 if (ocfs2_dealloc_has_cluster(&dealloc)) { 7172 ocfs2_schedule_truncate_log_flush(OCFS2_SB(old_inode->i_sb), 1); 7173 ocfs2_run_deallocs(OCFS2_SB(old_inode->i_sb), &dealloc); 7174 } 7175 7176 out: 7177 return ret; 7178 } 7179 7180 /* 7181 * Initialize security and acl for a already created inode. 7182 * Used for reflink a non-preserve-security file. 7183 * 7184 * It uses common api like ocfs2_xattr_set, so the caller 7185 * must not hold any lock expect i_mutex. 7186 */ 7187 int ocfs2_init_security_and_acl(struct inode *dir, 7188 struct inode *inode) 7189 { 7190 int ret = 0; 7191 struct buffer_head *dir_bh = NULL; 7192 struct ocfs2_security_xattr_info si = { 7193 .enable = 1, 7194 }; 7195 7196 ret = ocfs2_init_security_get(inode, dir, &si); 7197 if (!ret) { 7198 ret = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, 7199 si.name, si.value, si.value_len, 7200 XATTR_CREATE); 7201 if (ret) { 7202 mlog_errno(ret); 7203 goto leave; 7204 } 7205 } else if (ret != -EOPNOTSUPP) { 7206 mlog_errno(ret); 7207 goto leave; 7208 } 7209 7210 ret = ocfs2_inode_lock(dir, &dir_bh, 0); 7211 if (ret) { 7212 mlog_errno(ret); 7213 goto leave; 7214 } 7215 7216 ret = ocfs2_init_acl(NULL, inode, dir, NULL, dir_bh, NULL, NULL); 7217 if (ret) 7218 mlog_errno(ret); 7219 7220 ocfs2_inode_unlock(dir, 0); 7221 brelse(dir_bh); 7222 leave: 7223 return ret; 7224 } 7225 /* 7226 * 'security' attributes support 7227 */ 7228 static size_t ocfs2_xattr_security_list(struct dentry *dentry, char *list, 7229 size_t list_size, const char *name, 7230 size_t name_len, int type) 7231 { 7232 const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN; 7233 const size_t total_len = prefix_len + name_len + 1; 7234 7235 if (list && total_len <= list_size) { 7236 memcpy(list, XATTR_SECURITY_PREFIX, prefix_len); 7237 memcpy(list + prefix_len, name, name_len); 7238 list[prefix_len + name_len] = '\0'; 7239 } 7240 return total_len; 7241 } 7242 7243 static int ocfs2_xattr_security_get(struct dentry *dentry, const char *name, 7244 void *buffer, size_t size, int type) 7245 { 7246 if (strcmp(name, "") == 0) 7247 return -EINVAL; 7248 return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_SECURITY, 7249 name, buffer, size); 7250 } 7251 7252 static int ocfs2_xattr_security_set(struct dentry *dentry, const char *name, 7253 const void *value, size_t size, int flags, int type) 7254 { 7255 if (strcmp(name, "") == 0) 7256 return -EINVAL; 7257 7258 return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_SECURITY, 7259 name, value, size, flags); 7260 } 7261 7262 int ocfs2_init_security_get(struct inode *inode, 7263 struct inode *dir, 7264 struct ocfs2_security_xattr_info *si) 7265 { 7266 /* check whether ocfs2 support feature xattr */ 7267 if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb))) 7268 return -EOPNOTSUPP; 7269 return security_inode_init_security(inode, dir, &si->name, &si->value, 7270 &si->value_len); 7271 } 7272 7273 int ocfs2_init_security_set(handle_t *handle, 7274 struct inode *inode, 7275 struct buffer_head *di_bh, 7276 struct ocfs2_security_xattr_info *si, 7277 struct ocfs2_alloc_context *xattr_ac, 7278 struct ocfs2_alloc_context *data_ac) 7279 { 7280 return ocfs2_xattr_set_handle(handle, inode, di_bh, 7281 OCFS2_XATTR_INDEX_SECURITY, 7282 si->name, si->value, si->value_len, 0, 7283 xattr_ac, data_ac); 7284 } 7285 7286 const struct xattr_handler ocfs2_xattr_security_handler = { 7287 .prefix = XATTR_SECURITY_PREFIX, 7288 .list = ocfs2_xattr_security_list, 7289 .get = ocfs2_xattr_security_get, 7290 .set = ocfs2_xattr_security_set, 7291 }; 7292 7293 /* 7294 * 'trusted' attributes support 7295 */ 7296 static size_t ocfs2_xattr_trusted_list(struct dentry *dentry, char *list, 7297 size_t list_size, const char *name, 7298 size_t name_len, int type) 7299 { 7300 const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN; 7301 const size_t total_len = prefix_len + name_len + 1; 7302 7303 if (list && total_len <= list_size) { 7304 memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len); 7305 memcpy(list + prefix_len, name, name_len); 7306 list[prefix_len + name_len] = '\0'; 7307 } 7308 return total_len; 7309 } 7310 7311 static int ocfs2_xattr_trusted_get(struct dentry *dentry, const char *name, 7312 void *buffer, size_t size, int type) 7313 { 7314 if (strcmp(name, "") == 0) 7315 return -EINVAL; 7316 return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_TRUSTED, 7317 name, buffer, size); 7318 } 7319 7320 static int ocfs2_xattr_trusted_set(struct dentry *dentry, const char *name, 7321 const void *value, size_t size, int flags, int type) 7322 { 7323 if (strcmp(name, "") == 0) 7324 return -EINVAL; 7325 7326 return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_TRUSTED, 7327 name, value, size, flags); 7328 } 7329 7330 const struct xattr_handler ocfs2_xattr_trusted_handler = { 7331 .prefix = XATTR_TRUSTED_PREFIX, 7332 .list = ocfs2_xattr_trusted_list, 7333 .get = ocfs2_xattr_trusted_get, 7334 .set = ocfs2_xattr_trusted_set, 7335 }; 7336 7337 /* 7338 * 'user' attributes support 7339 */ 7340 static size_t ocfs2_xattr_user_list(struct dentry *dentry, char *list, 7341 size_t list_size, const char *name, 7342 size_t name_len, int type) 7343 { 7344 const size_t prefix_len = XATTR_USER_PREFIX_LEN; 7345 const size_t total_len = prefix_len + name_len + 1; 7346 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); 7347 7348 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 7349 return 0; 7350 7351 if (list && total_len <= list_size) { 7352 memcpy(list, XATTR_USER_PREFIX, prefix_len); 7353 memcpy(list + prefix_len, name, name_len); 7354 list[prefix_len + name_len] = '\0'; 7355 } 7356 return total_len; 7357 } 7358 7359 static int ocfs2_xattr_user_get(struct dentry *dentry, const char *name, 7360 void *buffer, size_t size, int type) 7361 { 7362 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); 7363 7364 if (strcmp(name, "") == 0) 7365 return -EINVAL; 7366 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 7367 return -EOPNOTSUPP; 7368 return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_USER, name, 7369 buffer, size); 7370 } 7371 7372 static int ocfs2_xattr_user_set(struct dentry *dentry, const char *name, 7373 const void *value, size_t size, int flags, int type) 7374 { 7375 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); 7376 7377 if (strcmp(name, "") == 0) 7378 return -EINVAL; 7379 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 7380 return -EOPNOTSUPP; 7381 7382 return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_USER, 7383 name, value, size, flags); 7384 } 7385 7386 const struct xattr_handler ocfs2_xattr_user_handler = { 7387 .prefix = XATTR_USER_PREFIX, 7388 .list = ocfs2_xattr_user_list, 7389 .get = ocfs2_xattr_user_get, 7390 .set = ocfs2_xattr_user_set, 7391 }; 7392