1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * xattr.c 4 * 5 * Copyright (C) 2004, 2008 Oracle. All rights reserved. 6 * 7 * CREDITS: 8 * Lots of code in this file is copy from linux/fs/ext3/xattr.c. 9 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> 10 */ 11 12 #include <linux/capability.h> 13 #include <linux/fs.h> 14 #include <linux/types.h> 15 #include <linux/slab.h> 16 #include <linux/highmem.h> 17 #include <linux/pagemap.h> 18 #include <linux/uio.h> 19 #include <linux/sched.h> 20 #include <linux/splice.h> 21 #include <linux/mount.h> 22 #include <linux/writeback.h> 23 #include <linux/falloc.h> 24 #include <linux/sort.h> 25 #include <linux/init.h> 26 #include <linux/module.h> 27 #include <linux/string.h> 28 #include <linux/security.h> 29 30 #include <cluster/masklog.h> 31 32 #include "ocfs2.h" 33 #include "alloc.h" 34 #include "blockcheck.h" 35 #include "dlmglue.h" 36 #include "file.h" 37 #include "symlink.h" 38 #include "sysfile.h" 39 #include "inode.h" 40 #include "journal.h" 41 #include "ocfs2_fs.h" 42 #include "suballoc.h" 43 #include "uptodate.h" 44 #include "buffer_head_io.h" 45 #include "super.h" 46 #include "xattr.h" 47 #include "refcounttree.h" 48 #include "acl.h" 49 #include "ocfs2_trace.h" 50 51 struct ocfs2_xattr_def_value_root { 52 struct ocfs2_xattr_value_root xv; 53 struct ocfs2_extent_rec er; 54 }; 55 56 struct ocfs2_xattr_bucket { 57 /* The inode these xattrs are associated with */ 58 struct inode *bu_inode; 59 60 /* The actual buffers that make up the bucket */ 61 struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET]; 62 63 /* How many blocks make up one bucket for this filesystem */ 64 int bu_blocks; 65 }; 66 67 struct ocfs2_xattr_set_ctxt { 68 handle_t *handle; 69 struct ocfs2_alloc_context *meta_ac; 70 struct ocfs2_alloc_context *data_ac; 71 struct ocfs2_cached_dealloc_ctxt dealloc; 72 int set_abort; 73 }; 74 75 #define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root)) 76 #define OCFS2_XATTR_INLINE_SIZE 80 77 #define OCFS2_XATTR_HEADER_GAP 4 78 #define OCFS2_XATTR_FREE_IN_IBODY (OCFS2_MIN_XATTR_INLINE_SIZE \ 79 - sizeof(struct ocfs2_xattr_header) \ 80 - OCFS2_XATTR_HEADER_GAP) 81 #define OCFS2_XATTR_FREE_IN_BLOCK(ptr) ((ptr)->i_sb->s_blocksize \ 82 - sizeof(struct ocfs2_xattr_block) \ 83 - sizeof(struct ocfs2_xattr_header) \ 84 - OCFS2_XATTR_HEADER_GAP) 85 86 static struct ocfs2_xattr_def_value_root def_xv = { 87 .xv.xr_list.l_count = cpu_to_le16(1), 88 }; 89 90 const struct xattr_handler *ocfs2_xattr_handlers[] = { 91 &ocfs2_xattr_user_handler, 92 &posix_acl_access_xattr_handler, 93 &posix_acl_default_xattr_handler, 94 &ocfs2_xattr_trusted_handler, 95 &ocfs2_xattr_security_handler, 96 NULL 97 }; 98 99 static const struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = { 100 [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler, 101 [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS] 102 = &posix_acl_access_xattr_handler, 103 [OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT] 104 = &posix_acl_default_xattr_handler, 105 [OCFS2_XATTR_INDEX_TRUSTED] = &ocfs2_xattr_trusted_handler, 106 [OCFS2_XATTR_INDEX_SECURITY] = &ocfs2_xattr_security_handler, 107 }; 108 109 struct ocfs2_xattr_info { 110 int xi_name_index; 111 const char *xi_name; 112 int xi_name_len; 113 const void *xi_value; 114 size_t xi_value_len; 115 }; 116 117 struct ocfs2_xattr_search { 118 struct buffer_head *inode_bh; 119 /* 120 * xattr_bh point to the block buffer head which has extended attribute 121 * when extended attribute in inode, xattr_bh is equal to inode_bh. 122 */ 123 struct buffer_head *xattr_bh; 124 struct ocfs2_xattr_header *header; 125 struct ocfs2_xattr_bucket *bucket; 126 void *base; 127 void *end; 128 struct ocfs2_xattr_entry *here; 129 int not_found; 130 }; 131 132 /* Operations on struct ocfs2_xa_entry */ 133 struct ocfs2_xa_loc; 134 struct ocfs2_xa_loc_operations { 135 /* 136 * Journal functions 137 */ 138 int (*xlo_journal_access)(handle_t *handle, struct ocfs2_xa_loc *loc, 139 int type); 140 void (*xlo_journal_dirty)(handle_t *handle, struct ocfs2_xa_loc *loc); 141 142 /* 143 * Return a pointer to the appropriate buffer in loc->xl_storage 144 * at the given offset from loc->xl_header. 145 */ 146 void *(*xlo_offset_pointer)(struct ocfs2_xa_loc *loc, int offset); 147 148 /* Can we reuse the existing entry for the new value? */ 149 int (*xlo_can_reuse)(struct ocfs2_xa_loc *loc, 150 struct ocfs2_xattr_info *xi); 151 152 /* How much space is needed for the new value? */ 153 int (*xlo_check_space)(struct ocfs2_xa_loc *loc, 154 struct ocfs2_xattr_info *xi); 155 156 /* 157 * Return the offset of the first name+value pair. This is 158 * the start of our downward-filling free space. 159 */ 160 int (*xlo_get_free_start)(struct ocfs2_xa_loc *loc); 161 162 /* 163 * Remove the name+value at this location. Do whatever is 164 * appropriate with the remaining name+value pairs. 165 */ 166 void (*xlo_wipe_namevalue)(struct ocfs2_xa_loc *loc); 167 168 /* Fill xl_entry with a new entry */ 169 void (*xlo_add_entry)(struct ocfs2_xa_loc *loc, u32 name_hash); 170 171 /* Add name+value storage to an entry */ 172 void (*xlo_add_namevalue)(struct ocfs2_xa_loc *loc, int size); 173 174 /* 175 * Initialize the value buf's access and bh fields for this entry. 176 * ocfs2_xa_fill_value_buf() will handle the xv pointer. 177 */ 178 void (*xlo_fill_value_buf)(struct ocfs2_xa_loc *loc, 179 struct ocfs2_xattr_value_buf *vb); 180 }; 181 182 /* 183 * Describes an xattr entry location. This is a memory structure 184 * tracking the on-disk structure. 185 */ 186 struct ocfs2_xa_loc { 187 /* This xattr belongs to this inode */ 188 struct inode *xl_inode; 189 190 /* The ocfs2_xattr_header inside the on-disk storage. Not NULL. */ 191 struct ocfs2_xattr_header *xl_header; 192 193 /* Bytes from xl_header to the end of the storage */ 194 int xl_size; 195 196 /* 197 * The ocfs2_xattr_entry this location describes. If this is 198 * NULL, this location describes the on-disk structure where it 199 * would have been. 200 */ 201 struct ocfs2_xattr_entry *xl_entry; 202 203 /* 204 * Internal housekeeping 205 */ 206 207 /* Buffer(s) containing this entry */ 208 void *xl_storage; 209 210 /* Operations on the storage backing this location */ 211 const struct ocfs2_xa_loc_operations *xl_ops; 212 }; 213 214 /* 215 * Convenience functions to calculate how much space is needed for a 216 * given name+value pair 217 */ 218 static int namevalue_size(int name_len, uint64_t value_len) 219 { 220 if (value_len > OCFS2_XATTR_INLINE_SIZE) 221 return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE; 222 else 223 return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len); 224 } 225 226 static int namevalue_size_xi(struct ocfs2_xattr_info *xi) 227 { 228 return namevalue_size(xi->xi_name_len, xi->xi_value_len); 229 } 230 231 static int namevalue_size_xe(struct ocfs2_xattr_entry *xe) 232 { 233 u64 value_len = le64_to_cpu(xe->xe_value_size); 234 235 BUG_ON((value_len > OCFS2_XATTR_INLINE_SIZE) && 236 ocfs2_xattr_is_local(xe)); 237 return namevalue_size(xe->xe_name_len, value_len); 238 } 239 240 241 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb, 242 struct ocfs2_xattr_header *xh, 243 int index, 244 int *block_off, 245 int *new_offset); 246 247 static int ocfs2_xattr_block_find(struct inode *inode, 248 int name_index, 249 const char *name, 250 struct ocfs2_xattr_search *xs); 251 static int ocfs2_xattr_index_block_find(struct inode *inode, 252 struct buffer_head *root_bh, 253 int name_index, 254 const char *name, 255 struct ocfs2_xattr_search *xs); 256 257 static int ocfs2_xattr_tree_list_index_block(struct inode *inode, 258 struct buffer_head *blk_bh, 259 char *buffer, 260 size_t buffer_size); 261 262 static int ocfs2_xattr_create_index_block(struct inode *inode, 263 struct ocfs2_xattr_search *xs, 264 struct ocfs2_xattr_set_ctxt *ctxt); 265 266 static int ocfs2_xattr_set_entry_index_block(struct inode *inode, 267 struct ocfs2_xattr_info *xi, 268 struct ocfs2_xattr_search *xs, 269 struct ocfs2_xattr_set_ctxt *ctxt); 270 271 typedef int (xattr_tree_rec_func)(struct inode *inode, 272 struct buffer_head *root_bh, 273 u64 blkno, u32 cpos, u32 len, void *para); 274 static int ocfs2_iterate_xattr_index_block(struct inode *inode, 275 struct buffer_head *root_bh, 276 xattr_tree_rec_func *rec_func, 277 void *para); 278 static int ocfs2_delete_xattr_in_bucket(struct inode *inode, 279 struct ocfs2_xattr_bucket *bucket, 280 void *para); 281 static int ocfs2_rm_xattr_cluster(struct inode *inode, 282 struct buffer_head *root_bh, 283 u64 blkno, 284 u32 cpos, 285 u32 len, 286 void *para); 287 288 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle, 289 u64 src_blk, u64 last_blk, u64 to_blk, 290 unsigned int start_bucket, 291 u32 *first_hash); 292 static int ocfs2_prepare_refcount_xattr(struct inode *inode, 293 struct ocfs2_dinode *di, 294 struct ocfs2_xattr_info *xi, 295 struct ocfs2_xattr_search *xis, 296 struct ocfs2_xattr_search *xbs, 297 struct ocfs2_refcount_tree **ref_tree, 298 int *meta_need, 299 int *credits); 300 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb, 301 struct ocfs2_xattr_bucket *bucket, 302 int offset, 303 struct ocfs2_xattr_value_root **xv, 304 struct buffer_head **bh); 305 306 static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb) 307 { 308 return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE; 309 } 310 311 static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb) 312 { 313 return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits); 314 } 315 316 #define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr) 317 #define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data) 318 #define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0)) 319 320 static struct ocfs2_xattr_bucket *ocfs2_xattr_bucket_new(struct inode *inode) 321 { 322 struct ocfs2_xattr_bucket *bucket; 323 int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 324 325 BUG_ON(blks > OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET); 326 327 bucket = kzalloc(sizeof(struct ocfs2_xattr_bucket), GFP_NOFS); 328 if (bucket) { 329 bucket->bu_inode = inode; 330 bucket->bu_blocks = blks; 331 } 332 333 return bucket; 334 } 335 336 static void ocfs2_xattr_bucket_relse(struct ocfs2_xattr_bucket *bucket) 337 { 338 int i; 339 340 for (i = 0; i < bucket->bu_blocks; i++) { 341 brelse(bucket->bu_bhs[i]); 342 bucket->bu_bhs[i] = NULL; 343 } 344 } 345 346 static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket) 347 { 348 if (bucket) { 349 ocfs2_xattr_bucket_relse(bucket); 350 bucket->bu_inode = NULL; 351 kfree(bucket); 352 } 353 } 354 355 /* 356 * A bucket that has never been written to disk doesn't need to be 357 * read. We just need the buffer_heads. Don't call this for 358 * buckets that are already on disk. ocfs2_read_xattr_bucket() initializes 359 * them fully. 360 */ 361 static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket, 362 u64 xb_blkno, int new) 363 { 364 int i, rc = 0; 365 366 for (i = 0; i < bucket->bu_blocks; i++) { 367 bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb, 368 xb_blkno + i); 369 if (!bucket->bu_bhs[i]) { 370 rc = -ENOMEM; 371 mlog_errno(rc); 372 break; 373 } 374 375 if (!ocfs2_buffer_uptodate(INODE_CACHE(bucket->bu_inode), 376 bucket->bu_bhs[i])) { 377 if (new) 378 ocfs2_set_new_buffer_uptodate(INODE_CACHE(bucket->bu_inode), 379 bucket->bu_bhs[i]); 380 else { 381 set_buffer_uptodate(bucket->bu_bhs[i]); 382 ocfs2_set_buffer_uptodate(INODE_CACHE(bucket->bu_inode), 383 bucket->bu_bhs[i]); 384 } 385 } 386 } 387 388 if (rc) 389 ocfs2_xattr_bucket_relse(bucket); 390 return rc; 391 } 392 393 /* Read the xattr bucket at xb_blkno */ 394 static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket, 395 u64 xb_blkno) 396 { 397 int rc; 398 399 rc = ocfs2_read_blocks(INODE_CACHE(bucket->bu_inode), xb_blkno, 400 bucket->bu_blocks, bucket->bu_bhs, 0, 401 NULL); 402 if (!rc) { 403 spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 404 rc = ocfs2_validate_meta_ecc_bhs(bucket->bu_inode->i_sb, 405 bucket->bu_bhs, 406 bucket->bu_blocks, 407 &bucket_xh(bucket)->xh_check); 408 spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 409 if (rc) 410 mlog_errno(rc); 411 } 412 413 if (rc) 414 ocfs2_xattr_bucket_relse(bucket); 415 return rc; 416 } 417 418 static int ocfs2_xattr_bucket_journal_access(handle_t *handle, 419 struct ocfs2_xattr_bucket *bucket, 420 int type) 421 { 422 int i, rc = 0; 423 424 for (i = 0; i < bucket->bu_blocks; i++) { 425 rc = ocfs2_journal_access(handle, 426 INODE_CACHE(bucket->bu_inode), 427 bucket->bu_bhs[i], type); 428 if (rc) { 429 mlog_errno(rc); 430 break; 431 } 432 } 433 434 return rc; 435 } 436 437 static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle, 438 struct ocfs2_xattr_bucket *bucket) 439 { 440 int i; 441 442 spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 443 ocfs2_compute_meta_ecc_bhs(bucket->bu_inode->i_sb, 444 bucket->bu_bhs, bucket->bu_blocks, 445 &bucket_xh(bucket)->xh_check); 446 spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 447 448 for (i = 0; i < bucket->bu_blocks; i++) 449 ocfs2_journal_dirty(handle, bucket->bu_bhs[i]); 450 } 451 452 static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest, 453 struct ocfs2_xattr_bucket *src) 454 { 455 int i; 456 int blocksize = src->bu_inode->i_sb->s_blocksize; 457 458 BUG_ON(dest->bu_blocks != src->bu_blocks); 459 BUG_ON(dest->bu_inode != src->bu_inode); 460 461 for (i = 0; i < src->bu_blocks; i++) { 462 memcpy(bucket_block(dest, i), bucket_block(src, i), 463 blocksize); 464 } 465 } 466 467 static int ocfs2_validate_xattr_block(struct super_block *sb, 468 struct buffer_head *bh) 469 { 470 int rc; 471 struct ocfs2_xattr_block *xb = 472 (struct ocfs2_xattr_block *)bh->b_data; 473 474 trace_ocfs2_validate_xattr_block((unsigned long long)bh->b_blocknr); 475 476 BUG_ON(!buffer_uptodate(bh)); 477 478 /* 479 * If the ecc fails, we return the error but otherwise 480 * leave the filesystem running. We know any error is 481 * local to this block. 482 */ 483 rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &xb->xb_check); 484 if (rc) 485 return rc; 486 487 /* 488 * Errors after here are fatal 489 */ 490 491 if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) { 492 return ocfs2_error(sb, 493 "Extended attribute block #%llu has bad signature %.*s\n", 494 (unsigned long long)bh->b_blocknr, 7, 495 xb->xb_signature); 496 } 497 498 if (le64_to_cpu(xb->xb_blkno) != bh->b_blocknr) { 499 return ocfs2_error(sb, 500 "Extended attribute block #%llu has an invalid xb_blkno of %llu\n", 501 (unsigned long long)bh->b_blocknr, 502 (unsigned long long)le64_to_cpu(xb->xb_blkno)); 503 } 504 505 if (le32_to_cpu(xb->xb_fs_generation) != OCFS2_SB(sb)->fs_generation) { 506 return ocfs2_error(sb, 507 "Extended attribute block #%llu has an invalid xb_fs_generation of #%u\n", 508 (unsigned long long)bh->b_blocknr, 509 le32_to_cpu(xb->xb_fs_generation)); 510 } 511 512 return 0; 513 } 514 515 static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno, 516 struct buffer_head **bh) 517 { 518 int rc; 519 struct buffer_head *tmp = *bh; 520 521 rc = ocfs2_read_block(INODE_CACHE(inode), xb_blkno, &tmp, 522 ocfs2_validate_xattr_block); 523 524 /* If ocfs2_read_block() got us a new bh, pass it up. */ 525 if (!rc && !*bh) 526 *bh = tmp; 527 528 return rc; 529 } 530 531 static inline const char *ocfs2_xattr_prefix(int name_index) 532 { 533 const struct xattr_handler *handler = NULL; 534 535 if (name_index > 0 && name_index < OCFS2_XATTR_MAX) 536 handler = ocfs2_xattr_handler_map[name_index]; 537 return handler ? xattr_prefix(handler) : NULL; 538 } 539 540 static u32 ocfs2_xattr_name_hash(struct inode *inode, 541 const char *name, 542 int name_len) 543 { 544 /* Get hash value of uuid from super block */ 545 u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash; 546 int i; 547 548 /* hash extended attribute name */ 549 for (i = 0; i < name_len; i++) { 550 hash = (hash << OCFS2_HASH_SHIFT) ^ 551 (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^ 552 *name++; 553 } 554 555 return hash; 556 } 557 558 static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len) 559 { 560 return namevalue_size(name_len, value_len) + 561 sizeof(struct ocfs2_xattr_entry); 562 } 563 564 static int ocfs2_xi_entry_usage(struct ocfs2_xattr_info *xi) 565 { 566 return namevalue_size_xi(xi) + 567 sizeof(struct ocfs2_xattr_entry); 568 } 569 570 static int ocfs2_xe_entry_usage(struct ocfs2_xattr_entry *xe) 571 { 572 return namevalue_size_xe(xe) + 573 sizeof(struct ocfs2_xattr_entry); 574 } 575 576 int ocfs2_calc_security_init(struct inode *dir, 577 struct ocfs2_security_xattr_info *si, 578 int *want_clusters, 579 int *xattr_credits, 580 struct ocfs2_alloc_context **xattr_ac) 581 { 582 int ret = 0; 583 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); 584 int s_size = ocfs2_xattr_entry_real_size(strlen(si->name), 585 si->value_len); 586 587 /* 588 * The max space of security xattr taken inline is 589 * 256(name) + 80(value) + 16(entry) = 352 bytes, 590 * So reserve one metadata block for it is ok. 591 */ 592 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE || 593 s_size > OCFS2_XATTR_FREE_IN_IBODY) { 594 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac); 595 if (ret) { 596 mlog_errno(ret); 597 return ret; 598 } 599 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; 600 } 601 602 /* reserve clusters for xattr value which will be set in B tree*/ 603 if (si->value_len > OCFS2_XATTR_INLINE_SIZE) { 604 int new_clusters = ocfs2_clusters_for_bytes(dir->i_sb, 605 si->value_len); 606 607 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb, 608 new_clusters); 609 *want_clusters += new_clusters; 610 } 611 return ret; 612 } 613 614 int ocfs2_calc_xattr_init(struct inode *dir, 615 struct buffer_head *dir_bh, 616 umode_t mode, 617 struct ocfs2_security_xattr_info *si, 618 int *want_clusters, 619 int *xattr_credits, 620 int *want_meta) 621 { 622 int ret = 0; 623 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); 624 int s_size = 0, a_size = 0, acl_len = 0, new_clusters; 625 626 if (si->enable) 627 s_size = ocfs2_xattr_entry_real_size(strlen(si->name), 628 si->value_len); 629 630 if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) { 631 down_read(&OCFS2_I(dir)->ip_xattr_sem); 632 acl_len = ocfs2_xattr_get_nolock(dir, dir_bh, 633 OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT, 634 "", NULL, 0); 635 up_read(&OCFS2_I(dir)->ip_xattr_sem); 636 if (acl_len > 0) { 637 a_size = ocfs2_xattr_entry_real_size(0, acl_len); 638 if (S_ISDIR(mode)) 639 a_size <<= 1; 640 } else if (acl_len != 0 && acl_len != -ENODATA) { 641 ret = acl_len; 642 mlog_errno(ret); 643 return ret; 644 } 645 } 646 647 if (!(s_size + a_size)) 648 return ret; 649 650 /* 651 * The max space of security xattr taken inline is 652 * 256(name) + 80(value) + 16(entry) = 352 bytes, 653 * The max space of acl xattr taken inline is 654 * 80(value) + 16(entry) * 2(if directory) = 192 bytes, 655 * when blocksize = 512, may reserve one more cluser for 656 * xattr bucket, otherwise reserve one metadata block 657 * for them is ok. 658 * If this is a new directory with inline data, 659 * we choose to reserve the entire inline area for 660 * directory contents and force an external xattr block. 661 */ 662 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE || 663 (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) || 664 (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) { 665 *want_meta = *want_meta + 1; 666 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; 667 } 668 669 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE && 670 (s_size + a_size) > OCFS2_XATTR_FREE_IN_BLOCK(dir)) { 671 *want_clusters += 1; 672 *xattr_credits += ocfs2_blocks_per_xattr_bucket(dir->i_sb); 673 } 674 675 /* 676 * reserve credits and clusters for xattrs which has large value 677 * and have to be set outside 678 */ 679 if (si->enable && si->value_len > OCFS2_XATTR_INLINE_SIZE) { 680 new_clusters = ocfs2_clusters_for_bytes(dir->i_sb, 681 si->value_len); 682 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb, 683 new_clusters); 684 *want_clusters += new_clusters; 685 } 686 if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL && 687 acl_len > OCFS2_XATTR_INLINE_SIZE) { 688 /* for directory, it has DEFAULT and ACCESS two types of acls */ 689 new_clusters = (S_ISDIR(mode) ? 2 : 1) * 690 ocfs2_clusters_for_bytes(dir->i_sb, acl_len); 691 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb, 692 new_clusters); 693 *want_clusters += new_clusters; 694 } 695 696 return ret; 697 } 698 699 static int ocfs2_xattr_extend_allocation(struct inode *inode, 700 u32 clusters_to_add, 701 struct ocfs2_xattr_value_buf *vb, 702 struct ocfs2_xattr_set_ctxt *ctxt) 703 { 704 int status = 0, credits; 705 handle_t *handle = ctxt->handle; 706 enum ocfs2_alloc_restarted why; 707 u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters); 708 struct ocfs2_extent_tree et; 709 710 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); 711 712 while (clusters_to_add) { 713 trace_ocfs2_xattr_extend_allocation(clusters_to_add); 714 715 status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh, 716 OCFS2_JOURNAL_ACCESS_WRITE); 717 if (status < 0) { 718 mlog_errno(status); 719 break; 720 } 721 722 prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters); 723 status = ocfs2_add_clusters_in_btree(handle, 724 &et, 725 &logical_start, 726 clusters_to_add, 727 0, 728 ctxt->data_ac, 729 ctxt->meta_ac, 730 &why); 731 if ((status < 0) && (status != -EAGAIN)) { 732 if (status != -ENOSPC) 733 mlog_errno(status); 734 break; 735 } 736 737 ocfs2_journal_dirty(handle, vb->vb_bh); 738 739 clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - 740 prev_clusters; 741 742 if (why != RESTART_NONE && clusters_to_add) { 743 /* 744 * We can only fail in case the alloc file doesn't give 745 * up enough clusters. 746 */ 747 BUG_ON(why == RESTART_META); 748 749 credits = ocfs2_calc_extend_credits(inode->i_sb, 750 &vb->vb_xv->xr_list); 751 status = ocfs2_extend_trans(handle, credits); 752 if (status < 0) { 753 status = -ENOMEM; 754 mlog_errno(status); 755 break; 756 } 757 } 758 } 759 760 return status; 761 } 762 763 static int __ocfs2_remove_xattr_range(struct inode *inode, 764 struct ocfs2_xattr_value_buf *vb, 765 u32 cpos, u32 phys_cpos, u32 len, 766 unsigned int ext_flags, 767 struct ocfs2_xattr_set_ctxt *ctxt) 768 { 769 int ret; 770 u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); 771 handle_t *handle = ctxt->handle; 772 struct ocfs2_extent_tree et; 773 774 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); 775 776 ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh, 777 OCFS2_JOURNAL_ACCESS_WRITE); 778 if (ret) { 779 mlog_errno(ret); 780 goto out; 781 } 782 783 ret = ocfs2_remove_extent(handle, &et, cpos, len, ctxt->meta_ac, 784 &ctxt->dealloc); 785 if (ret) { 786 mlog_errno(ret); 787 goto out; 788 } 789 790 le32_add_cpu(&vb->vb_xv->xr_clusters, -len); 791 ocfs2_journal_dirty(handle, vb->vb_bh); 792 793 if (ext_flags & OCFS2_EXT_REFCOUNTED) 794 ret = ocfs2_decrease_refcount(inode, handle, 795 ocfs2_blocks_to_clusters(inode->i_sb, 796 phys_blkno), 797 len, ctxt->meta_ac, &ctxt->dealloc, 1); 798 else 799 ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc, 800 phys_blkno, len); 801 if (ret) 802 mlog_errno(ret); 803 804 out: 805 return ret; 806 } 807 808 static int ocfs2_xattr_shrink_size(struct inode *inode, 809 u32 old_clusters, 810 u32 new_clusters, 811 struct ocfs2_xattr_value_buf *vb, 812 struct ocfs2_xattr_set_ctxt *ctxt) 813 { 814 int ret = 0; 815 unsigned int ext_flags; 816 u32 trunc_len, cpos, phys_cpos, alloc_size; 817 u64 block; 818 819 if (old_clusters <= new_clusters) 820 return 0; 821 822 cpos = new_clusters; 823 trunc_len = old_clusters - new_clusters; 824 while (trunc_len) { 825 ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos, 826 &alloc_size, 827 &vb->vb_xv->xr_list, &ext_flags); 828 if (ret) { 829 mlog_errno(ret); 830 goto out; 831 } 832 833 if (alloc_size > trunc_len) 834 alloc_size = trunc_len; 835 836 ret = __ocfs2_remove_xattr_range(inode, vb, cpos, 837 phys_cpos, alloc_size, 838 ext_flags, ctxt); 839 if (ret) { 840 mlog_errno(ret); 841 goto out; 842 } 843 844 block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); 845 ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), 846 block, alloc_size); 847 cpos += alloc_size; 848 trunc_len -= alloc_size; 849 } 850 851 out: 852 return ret; 853 } 854 855 static int ocfs2_xattr_value_truncate(struct inode *inode, 856 struct ocfs2_xattr_value_buf *vb, 857 int len, 858 struct ocfs2_xattr_set_ctxt *ctxt) 859 { 860 int ret; 861 u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len); 862 u32 old_clusters = le32_to_cpu(vb->vb_xv->xr_clusters); 863 864 if (new_clusters == old_clusters) 865 return 0; 866 867 if (new_clusters > old_clusters) 868 ret = ocfs2_xattr_extend_allocation(inode, 869 new_clusters - old_clusters, 870 vb, ctxt); 871 else 872 ret = ocfs2_xattr_shrink_size(inode, 873 old_clusters, new_clusters, 874 vb, ctxt); 875 876 return ret; 877 } 878 879 static int ocfs2_xattr_list_entry(struct super_block *sb, 880 char *buffer, size_t size, 881 size_t *result, int type, 882 const char *name, int name_len) 883 { 884 char *p = buffer + *result; 885 const char *prefix; 886 int prefix_len; 887 int total_len; 888 889 switch(type) { 890 case OCFS2_XATTR_INDEX_USER: 891 if (OCFS2_SB(sb)->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 892 return 0; 893 break; 894 895 case OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS: 896 case OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT: 897 if (!(sb->s_flags & SB_POSIXACL)) 898 return 0; 899 break; 900 901 case OCFS2_XATTR_INDEX_TRUSTED: 902 if (!capable(CAP_SYS_ADMIN)) 903 return 0; 904 break; 905 } 906 907 prefix = ocfs2_xattr_prefix(type); 908 if (!prefix) 909 return 0; 910 prefix_len = strlen(prefix); 911 total_len = prefix_len + name_len + 1; 912 *result += total_len; 913 914 /* we are just looking for how big our buffer needs to be */ 915 if (!size) 916 return 0; 917 918 if (*result > size) 919 return -ERANGE; 920 921 memcpy(p, prefix, prefix_len); 922 memcpy(p + prefix_len, name, name_len); 923 p[prefix_len + name_len] = '\0'; 924 925 return 0; 926 } 927 928 static int ocfs2_xattr_list_entries(struct inode *inode, 929 struct ocfs2_xattr_header *header, 930 char *buffer, size_t buffer_size) 931 { 932 size_t result = 0; 933 int i, type, ret; 934 const char *name; 935 936 for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) { 937 struct ocfs2_xattr_entry *entry = &header->xh_entries[i]; 938 type = ocfs2_xattr_get_type(entry); 939 name = (const char *)header + 940 le16_to_cpu(entry->xe_name_offset); 941 942 ret = ocfs2_xattr_list_entry(inode->i_sb, 943 buffer, buffer_size, 944 &result, type, name, 945 entry->xe_name_len); 946 if (ret) 947 return ret; 948 } 949 950 return result; 951 } 952 953 int ocfs2_has_inline_xattr_value_outside(struct inode *inode, 954 struct ocfs2_dinode *di) 955 { 956 struct ocfs2_xattr_header *xh; 957 int i; 958 959 xh = (struct ocfs2_xattr_header *) 960 ((void *)di + inode->i_sb->s_blocksize - 961 le16_to_cpu(di->i_xattr_inline_size)); 962 963 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) 964 if (!ocfs2_xattr_is_local(&xh->xh_entries[i])) 965 return 1; 966 967 return 0; 968 } 969 970 static int ocfs2_xattr_ibody_list(struct inode *inode, 971 struct ocfs2_dinode *di, 972 char *buffer, 973 size_t buffer_size) 974 { 975 struct ocfs2_xattr_header *header = NULL; 976 struct ocfs2_inode_info *oi = OCFS2_I(inode); 977 int ret = 0; 978 979 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) 980 return ret; 981 982 header = (struct ocfs2_xattr_header *) 983 ((void *)di + inode->i_sb->s_blocksize - 984 le16_to_cpu(di->i_xattr_inline_size)); 985 986 ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size); 987 988 return ret; 989 } 990 991 static int ocfs2_xattr_block_list(struct inode *inode, 992 struct ocfs2_dinode *di, 993 char *buffer, 994 size_t buffer_size) 995 { 996 struct buffer_head *blk_bh = NULL; 997 struct ocfs2_xattr_block *xb; 998 int ret = 0; 999 1000 if (!di->i_xattr_loc) 1001 return ret; 1002 1003 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc), 1004 &blk_bh); 1005 if (ret < 0) { 1006 mlog_errno(ret); 1007 return ret; 1008 } 1009 1010 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 1011 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 1012 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header; 1013 ret = ocfs2_xattr_list_entries(inode, header, 1014 buffer, buffer_size); 1015 } else 1016 ret = ocfs2_xattr_tree_list_index_block(inode, blk_bh, 1017 buffer, buffer_size); 1018 1019 brelse(blk_bh); 1020 1021 return ret; 1022 } 1023 1024 ssize_t ocfs2_listxattr(struct dentry *dentry, 1025 char *buffer, 1026 size_t size) 1027 { 1028 int ret = 0, i_ret = 0, b_ret = 0; 1029 struct buffer_head *di_bh = NULL; 1030 struct ocfs2_dinode *di = NULL; 1031 struct ocfs2_inode_info *oi = OCFS2_I(d_inode(dentry)); 1032 1033 if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb))) 1034 return -EOPNOTSUPP; 1035 1036 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) 1037 return ret; 1038 1039 ret = ocfs2_inode_lock(d_inode(dentry), &di_bh, 0); 1040 if (ret < 0) { 1041 mlog_errno(ret); 1042 return ret; 1043 } 1044 1045 di = (struct ocfs2_dinode *)di_bh->b_data; 1046 1047 down_read(&oi->ip_xattr_sem); 1048 i_ret = ocfs2_xattr_ibody_list(d_inode(dentry), di, buffer, size); 1049 if (i_ret < 0) 1050 b_ret = 0; 1051 else { 1052 if (buffer) { 1053 buffer += i_ret; 1054 size -= i_ret; 1055 } 1056 b_ret = ocfs2_xattr_block_list(d_inode(dentry), di, 1057 buffer, size); 1058 if (b_ret < 0) 1059 i_ret = 0; 1060 } 1061 up_read(&oi->ip_xattr_sem); 1062 ocfs2_inode_unlock(d_inode(dentry), 0); 1063 1064 brelse(di_bh); 1065 1066 return i_ret + b_ret; 1067 } 1068 1069 static int ocfs2_xattr_find_entry(int name_index, 1070 const char *name, 1071 struct ocfs2_xattr_search *xs) 1072 { 1073 struct ocfs2_xattr_entry *entry; 1074 size_t name_len; 1075 int i, cmp = 1; 1076 1077 if (name == NULL) 1078 return -EINVAL; 1079 1080 name_len = strlen(name); 1081 entry = xs->here; 1082 for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) { 1083 cmp = name_index - ocfs2_xattr_get_type(entry); 1084 if (!cmp) 1085 cmp = name_len - entry->xe_name_len; 1086 if (!cmp) 1087 cmp = memcmp(name, (xs->base + 1088 le16_to_cpu(entry->xe_name_offset)), 1089 name_len); 1090 if (cmp == 0) 1091 break; 1092 entry += 1; 1093 } 1094 xs->here = entry; 1095 1096 return cmp ? -ENODATA : 0; 1097 } 1098 1099 static int ocfs2_xattr_get_value_outside(struct inode *inode, 1100 struct ocfs2_xattr_value_root *xv, 1101 void *buffer, 1102 size_t len) 1103 { 1104 u32 cpos, p_cluster, num_clusters, bpc, clusters; 1105 u64 blkno; 1106 int i, ret = 0; 1107 size_t cplen, blocksize; 1108 struct buffer_head *bh = NULL; 1109 struct ocfs2_extent_list *el; 1110 1111 el = &xv->xr_list; 1112 clusters = le32_to_cpu(xv->xr_clusters); 1113 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 1114 blocksize = inode->i_sb->s_blocksize; 1115 1116 cpos = 0; 1117 while (cpos < clusters) { 1118 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, 1119 &num_clusters, el, NULL); 1120 if (ret) { 1121 mlog_errno(ret); 1122 goto out; 1123 } 1124 1125 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 1126 /* Copy ocfs2_xattr_value */ 1127 for (i = 0; i < num_clusters * bpc; i++, blkno++) { 1128 ret = ocfs2_read_block(INODE_CACHE(inode), blkno, 1129 &bh, NULL); 1130 if (ret) { 1131 mlog_errno(ret); 1132 goto out; 1133 } 1134 1135 cplen = len >= blocksize ? blocksize : len; 1136 memcpy(buffer, bh->b_data, cplen); 1137 len -= cplen; 1138 buffer += cplen; 1139 1140 brelse(bh); 1141 bh = NULL; 1142 if (len == 0) 1143 break; 1144 } 1145 cpos += num_clusters; 1146 } 1147 out: 1148 return ret; 1149 } 1150 1151 static int ocfs2_xattr_ibody_get(struct inode *inode, 1152 int name_index, 1153 const char *name, 1154 void *buffer, 1155 size_t buffer_size, 1156 struct ocfs2_xattr_search *xs) 1157 { 1158 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1159 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 1160 struct ocfs2_xattr_value_root *xv; 1161 size_t size; 1162 int ret = 0; 1163 1164 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) 1165 return -ENODATA; 1166 1167 xs->end = (void *)di + inode->i_sb->s_blocksize; 1168 xs->header = (struct ocfs2_xattr_header *) 1169 (xs->end - le16_to_cpu(di->i_xattr_inline_size)); 1170 xs->base = (void *)xs->header; 1171 xs->here = xs->header->xh_entries; 1172 1173 ret = ocfs2_xattr_find_entry(name_index, name, xs); 1174 if (ret) 1175 return ret; 1176 size = le64_to_cpu(xs->here->xe_value_size); 1177 if (buffer) { 1178 if (size > buffer_size) 1179 return -ERANGE; 1180 if (ocfs2_xattr_is_local(xs->here)) { 1181 memcpy(buffer, (void *)xs->base + 1182 le16_to_cpu(xs->here->xe_name_offset) + 1183 OCFS2_XATTR_SIZE(xs->here->xe_name_len), size); 1184 } else { 1185 xv = (struct ocfs2_xattr_value_root *) 1186 (xs->base + le16_to_cpu( 1187 xs->here->xe_name_offset) + 1188 OCFS2_XATTR_SIZE(xs->here->xe_name_len)); 1189 ret = ocfs2_xattr_get_value_outside(inode, xv, 1190 buffer, size); 1191 if (ret < 0) { 1192 mlog_errno(ret); 1193 return ret; 1194 } 1195 } 1196 } 1197 1198 return size; 1199 } 1200 1201 static int ocfs2_xattr_block_get(struct inode *inode, 1202 int name_index, 1203 const char *name, 1204 void *buffer, 1205 size_t buffer_size, 1206 struct ocfs2_xattr_search *xs) 1207 { 1208 struct ocfs2_xattr_block *xb; 1209 struct ocfs2_xattr_value_root *xv; 1210 size_t size; 1211 int ret = -ENODATA, name_offset, name_len, i; 1212 int block_off; 1213 1214 xs->bucket = ocfs2_xattr_bucket_new(inode); 1215 if (!xs->bucket) { 1216 ret = -ENOMEM; 1217 mlog_errno(ret); 1218 goto cleanup; 1219 } 1220 1221 ret = ocfs2_xattr_block_find(inode, name_index, name, xs); 1222 if (ret) { 1223 mlog_errno(ret); 1224 goto cleanup; 1225 } 1226 1227 if (xs->not_found) { 1228 ret = -ENODATA; 1229 goto cleanup; 1230 } 1231 1232 xb = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; 1233 size = le64_to_cpu(xs->here->xe_value_size); 1234 if (buffer) { 1235 ret = -ERANGE; 1236 if (size > buffer_size) 1237 goto cleanup; 1238 1239 name_offset = le16_to_cpu(xs->here->xe_name_offset); 1240 name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len); 1241 i = xs->here - xs->header->xh_entries; 1242 1243 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 1244 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 1245 bucket_xh(xs->bucket), 1246 i, 1247 &block_off, 1248 &name_offset); 1249 if (ret) { 1250 mlog_errno(ret); 1251 goto cleanup; 1252 } 1253 xs->base = bucket_block(xs->bucket, block_off); 1254 } 1255 if (ocfs2_xattr_is_local(xs->here)) { 1256 memcpy(buffer, (void *)xs->base + 1257 name_offset + name_len, size); 1258 } else { 1259 xv = (struct ocfs2_xattr_value_root *) 1260 (xs->base + name_offset + name_len); 1261 ret = ocfs2_xattr_get_value_outside(inode, xv, 1262 buffer, size); 1263 if (ret < 0) { 1264 mlog_errno(ret); 1265 goto cleanup; 1266 } 1267 } 1268 } 1269 ret = size; 1270 cleanup: 1271 ocfs2_xattr_bucket_free(xs->bucket); 1272 1273 brelse(xs->xattr_bh); 1274 xs->xattr_bh = NULL; 1275 return ret; 1276 } 1277 1278 int ocfs2_xattr_get_nolock(struct inode *inode, 1279 struct buffer_head *di_bh, 1280 int name_index, 1281 const char *name, 1282 void *buffer, 1283 size_t buffer_size) 1284 { 1285 int ret; 1286 struct ocfs2_dinode *di = NULL; 1287 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1288 struct ocfs2_xattr_search xis = { 1289 .not_found = -ENODATA, 1290 }; 1291 struct ocfs2_xattr_search xbs = { 1292 .not_found = -ENODATA, 1293 }; 1294 1295 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) 1296 return -EOPNOTSUPP; 1297 1298 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) 1299 return -ENODATA; 1300 1301 xis.inode_bh = xbs.inode_bh = di_bh; 1302 di = (struct ocfs2_dinode *)di_bh->b_data; 1303 1304 ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer, 1305 buffer_size, &xis); 1306 if (ret == -ENODATA && di->i_xattr_loc) 1307 ret = ocfs2_xattr_block_get(inode, name_index, name, buffer, 1308 buffer_size, &xbs); 1309 1310 return ret; 1311 } 1312 1313 /* ocfs2_xattr_get() 1314 * 1315 * Copy an extended attribute into the buffer provided. 1316 * Buffer is NULL to compute the size of buffer required. 1317 */ 1318 static int ocfs2_xattr_get(struct inode *inode, 1319 int name_index, 1320 const char *name, 1321 void *buffer, 1322 size_t buffer_size) 1323 { 1324 int ret, had_lock; 1325 struct buffer_head *di_bh = NULL; 1326 struct ocfs2_lock_holder oh; 1327 1328 had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 0, &oh); 1329 if (had_lock < 0) { 1330 mlog_errno(had_lock); 1331 return had_lock; 1332 } 1333 down_read(&OCFS2_I(inode)->ip_xattr_sem); 1334 ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index, 1335 name, buffer, buffer_size); 1336 up_read(&OCFS2_I(inode)->ip_xattr_sem); 1337 1338 ocfs2_inode_unlock_tracker(inode, 0, &oh, had_lock); 1339 1340 brelse(di_bh); 1341 1342 return ret; 1343 } 1344 1345 static int __ocfs2_xattr_set_value_outside(struct inode *inode, 1346 handle_t *handle, 1347 struct ocfs2_xattr_value_buf *vb, 1348 const void *value, 1349 int value_len) 1350 { 1351 int ret = 0, i, cp_len; 1352 u16 blocksize = inode->i_sb->s_blocksize; 1353 u32 p_cluster, num_clusters; 1354 u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 1355 u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len); 1356 u64 blkno; 1357 struct buffer_head *bh = NULL; 1358 unsigned int ext_flags; 1359 struct ocfs2_xattr_value_root *xv = vb->vb_xv; 1360 1361 BUG_ON(clusters > le32_to_cpu(xv->xr_clusters)); 1362 1363 while (cpos < clusters) { 1364 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, 1365 &num_clusters, &xv->xr_list, 1366 &ext_flags); 1367 if (ret) { 1368 mlog_errno(ret); 1369 goto out; 1370 } 1371 1372 BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED); 1373 1374 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 1375 1376 for (i = 0; i < num_clusters * bpc; i++, blkno++) { 1377 ret = ocfs2_read_block(INODE_CACHE(inode), blkno, 1378 &bh, NULL); 1379 if (ret) { 1380 mlog_errno(ret); 1381 goto out; 1382 } 1383 1384 ret = ocfs2_journal_access(handle, 1385 INODE_CACHE(inode), 1386 bh, 1387 OCFS2_JOURNAL_ACCESS_WRITE); 1388 if (ret < 0) { 1389 mlog_errno(ret); 1390 goto out; 1391 } 1392 1393 cp_len = value_len > blocksize ? blocksize : value_len; 1394 memcpy(bh->b_data, value, cp_len); 1395 value_len -= cp_len; 1396 value += cp_len; 1397 if (cp_len < blocksize) 1398 memset(bh->b_data + cp_len, 0, 1399 blocksize - cp_len); 1400 1401 ocfs2_journal_dirty(handle, bh); 1402 brelse(bh); 1403 bh = NULL; 1404 1405 /* 1406 * XXX: do we need to empty all the following 1407 * blocks in this cluster? 1408 */ 1409 if (!value_len) 1410 break; 1411 } 1412 cpos += num_clusters; 1413 } 1414 out: 1415 brelse(bh); 1416 1417 return ret; 1418 } 1419 1420 static int ocfs2_xa_check_space_helper(int needed_space, int free_start, 1421 int num_entries) 1422 { 1423 int free_space; 1424 1425 if (!needed_space) 1426 return 0; 1427 1428 free_space = free_start - 1429 sizeof(struct ocfs2_xattr_header) - 1430 (num_entries * sizeof(struct ocfs2_xattr_entry)) - 1431 OCFS2_XATTR_HEADER_GAP; 1432 if (free_space < 0) 1433 return -EIO; 1434 if (free_space < needed_space) 1435 return -ENOSPC; 1436 1437 return 0; 1438 } 1439 1440 static int ocfs2_xa_journal_access(handle_t *handle, struct ocfs2_xa_loc *loc, 1441 int type) 1442 { 1443 return loc->xl_ops->xlo_journal_access(handle, loc, type); 1444 } 1445 1446 static void ocfs2_xa_journal_dirty(handle_t *handle, struct ocfs2_xa_loc *loc) 1447 { 1448 loc->xl_ops->xlo_journal_dirty(handle, loc); 1449 } 1450 1451 /* Give a pointer into the storage for the given offset */ 1452 static void *ocfs2_xa_offset_pointer(struct ocfs2_xa_loc *loc, int offset) 1453 { 1454 BUG_ON(offset >= loc->xl_size); 1455 return loc->xl_ops->xlo_offset_pointer(loc, offset); 1456 } 1457 1458 /* 1459 * Wipe the name+value pair and allow the storage to reclaim it. This 1460 * must be followed by either removal of the entry or a call to 1461 * ocfs2_xa_add_namevalue(). 1462 */ 1463 static void ocfs2_xa_wipe_namevalue(struct ocfs2_xa_loc *loc) 1464 { 1465 loc->xl_ops->xlo_wipe_namevalue(loc); 1466 } 1467 1468 /* 1469 * Find lowest offset to a name+value pair. This is the start of our 1470 * downward-growing free space. 1471 */ 1472 static int ocfs2_xa_get_free_start(struct ocfs2_xa_loc *loc) 1473 { 1474 return loc->xl_ops->xlo_get_free_start(loc); 1475 } 1476 1477 /* Can we reuse loc->xl_entry for xi? */ 1478 static int ocfs2_xa_can_reuse_entry(struct ocfs2_xa_loc *loc, 1479 struct ocfs2_xattr_info *xi) 1480 { 1481 return loc->xl_ops->xlo_can_reuse(loc, xi); 1482 } 1483 1484 /* How much free space is needed to set the new value */ 1485 static int ocfs2_xa_check_space(struct ocfs2_xa_loc *loc, 1486 struct ocfs2_xattr_info *xi) 1487 { 1488 return loc->xl_ops->xlo_check_space(loc, xi); 1489 } 1490 1491 static void ocfs2_xa_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) 1492 { 1493 loc->xl_ops->xlo_add_entry(loc, name_hash); 1494 loc->xl_entry->xe_name_hash = cpu_to_le32(name_hash); 1495 /* 1496 * We can't leave the new entry's xe_name_offset at zero or 1497 * add_namevalue() will go nuts. We set it to the size of our 1498 * storage so that it can never be less than any other entry. 1499 */ 1500 loc->xl_entry->xe_name_offset = cpu_to_le16(loc->xl_size); 1501 } 1502 1503 static void ocfs2_xa_add_namevalue(struct ocfs2_xa_loc *loc, 1504 struct ocfs2_xattr_info *xi) 1505 { 1506 int size = namevalue_size_xi(xi); 1507 int nameval_offset; 1508 char *nameval_buf; 1509 1510 loc->xl_ops->xlo_add_namevalue(loc, size); 1511 loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len); 1512 loc->xl_entry->xe_name_len = xi->xi_name_len; 1513 ocfs2_xattr_set_type(loc->xl_entry, xi->xi_name_index); 1514 ocfs2_xattr_set_local(loc->xl_entry, 1515 xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE); 1516 1517 nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 1518 nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset); 1519 memset(nameval_buf, 0, size); 1520 memcpy(nameval_buf, xi->xi_name, xi->xi_name_len); 1521 } 1522 1523 static void ocfs2_xa_fill_value_buf(struct ocfs2_xa_loc *loc, 1524 struct ocfs2_xattr_value_buf *vb) 1525 { 1526 int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 1527 int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len); 1528 1529 /* Value bufs are for value trees */ 1530 BUG_ON(ocfs2_xattr_is_local(loc->xl_entry)); 1531 BUG_ON(namevalue_size_xe(loc->xl_entry) != 1532 (name_size + OCFS2_XATTR_ROOT_SIZE)); 1533 1534 loc->xl_ops->xlo_fill_value_buf(loc, vb); 1535 vb->vb_xv = 1536 (struct ocfs2_xattr_value_root *)ocfs2_xa_offset_pointer(loc, 1537 nameval_offset + 1538 name_size); 1539 } 1540 1541 static int ocfs2_xa_block_journal_access(handle_t *handle, 1542 struct ocfs2_xa_loc *loc, int type) 1543 { 1544 struct buffer_head *bh = loc->xl_storage; 1545 ocfs2_journal_access_func access; 1546 1547 if (loc->xl_size == (bh->b_size - 1548 offsetof(struct ocfs2_xattr_block, 1549 xb_attrs.xb_header))) 1550 access = ocfs2_journal_access_xb; 1551 else 1552 access = ocfs2_journal_access_di; 1553 return access(handle, INODE_CACHE(loc->xl_inode), bh, type); 1554 } 1555 1556 static void ocfs2_xa_block_journal_dirty(handle_t *handle, 1557 struct ocfs2_xa_loc *loc) 1558 { 1559 struct buffer_head *bh = loc->xl_storage; 1560 1561 ocfs2_journal_dirty(handle, bh); 1562 } 1563 1564 static void *ocfs2_xa_block_offset_pointer(struct ocfs2_xa_loc *loc, 1565 int offset) 1566 { 1567 return (char *)loc->xl_header + offset; 1568 } 1569 1570 static int ocfs2_xa_block_can_reuse(struct ocfs2_xa_loc *loc, 1571 struct ocfs2_xattr_info *xi) 1572 { 1573 /* 1574 * Block storage is strict. If the sizes aren't exact, we will 1575 * remove the old one and reinsert the new. 1576 */ 1577 return namevalue_size_xe(loc->xl_entry) == 1578 namevalue_size_xi(xi); 1579 } 1580 1581 static int ocfs2_xa_block_get_free_start(struct ocfs2_xa_loc *loc) 1582 { 1583 struct ocfs2_xattr_header *xh = loc->xl_header; 1584 int i, count = le16_to_cpu(xh->xh_count); 1585 int offset, free_start = loc->xl_size; 1586 1587 for (i = 0; i < count; i++) { 1588 offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset); 1589 if (offset < free_start) 1590 free_start = offset; 1591 } 1592 1593 return free_start; 1594 } 1595 1596 static int ocfs2_xa_block_check_space(struct ocfs2_xa_loc *loc, 1597 struct ocfs2_xattr_info *xi) 1598 { 1599 int count = le16_to_cpu(loc->xl_header->xh_count); 1600 int free_start = ocfs2_xa_get_free_start(loc); 1601 int needed_space = ocfs2_xi_entry_usage(xi); 1602 1603 /* 1604 * Block storage will reclaim the original entry before inserting 1605 * the new value, so we only need the difference. If the new 1606 * entry is smaller than the old one, we don't need anything. 1607 */ 1608 if (loc->xl_entry) { 1609 /* Don't need space if we're reusing! */ 1610 if (ocfs2_xa_can_reuse_entry(loc, xi)) 1611 needed_space = 0; 1612 else 1613 needed_space -= ocfs2_xe_entry_usage(loc->xl_entry); 1614 } 1615 if (needed_space < 0) 1616 needed_space = 0; 1617 return ocfs2_xa_check_space_helper(needed_space, free_start, count); 1618 } 1619 1620 /* 1621 * Block storage for xattrs keeps the name+value pairs compacted. When 1622 * we remove one, we have to shift any that preceded it towards the end. 1623 */ 1624 static void ocfs2_xa_block_wipe_namevalue(struct ocfs2_xa_loc *loc) 1625 { 1626 int i, offset; 1627 int namevalue_offset, first_namevalue_offset, namevalue_size; 1628 struct ocfs2_xattr_entry *entry = loc->xl_entry; 1629 struct ocfs2_xattr_header *xh = loc->xl_header; 1630 int count = le16_to_cpu(xh->xh_count); 1631 1632 namevalue_offset = le16_to_cpu(entry->xe_name_offset); 1633 namevalue_size = namevalue_size_xe(entry); 1634 first_namevalue_offset = ocfs2_xa_get_free_start(loc); 1635 1636 /* Shift the name+value pairs */ 1637 memmove((char *)xh + first_namevalue_offset + namevalue_size, 1638 (char *)xh + first_namevalue_offset, 1639 namevalue_offset - first_namevalue_offset); 1640 memset((char *)xh + first_namevalue_offset, 0, namevalue_size); 1641 1642 /* Now tell xh->xh_entries about it */ 1643 for (i = 0; i < count; i++) { 1644 offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset); 1645 if (offset <= namevalue_offset) 1646 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, 1647 namevalue_size); 1648 } 1649 1650 /* 1651 * Note that we don't update xh_free_start or xh_name_value_len 1652 * because they're not used in block-stored xattrs. 1653 */ 1654 } 1655 1656 static void ocfs2_xa_block_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) 1657 { 1658 int count = le16_to_cpu(loc->xl_header->xh_count); 1659 loc->xl_entry = &(loc->xl_header->xh_entries[count]); 1660 le16_add_cpu(&loc->xl_header->xh_count, 1); 1661 memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry)); 1662 } 1663 1664 static void ocfs2_xa_block_add_namevalue(struct ocfs2_xa_loc *loc, int size) 1665 { 1666 int free_start = ocfs2_xa_get_free_start(loc); 1667 1668 loc->xl_entry->xe_name_offset = cpu_to_le16(free_start - size); 1669 } 1670 1671 static void ocfs2_xa_block_fill_value_buf(struct ocfs2_xa_loc *loc, 1672 struct ocfs2_xattr_value_buf *vb) 1673 { 1674 struct buffer_head *bh = loc->xl_storage; 1675 1676 if (loc->xl_size == (bh->b_size - 1677 offsetof(struct ocfs2_xattr_block, 1678 xb_attrs.xb_header))) 1679 vb->vb_access = ocfs2_journal_access_xb; 1680 else 1681 vb->vb_access = ocfs2_journal_access_di; 1682 vb->vb_bh = bh; 1683 } 1684 1685 /* 1686 * Operations for xattrs stored in blocks. This includes inline inode 1687 * storage and unindexed ocfs2_xattr_blocks. 1688 */ 1689 static const struct ocfs2_xa_loc_operations ocfs2_xa_block_loc_ops = { 1690 .xlo_journal_access = ocfs2_xa_block_journal_access, 1691 .xlo_journal_dirty = ocfs2_xa_block_journal_dirty, 1692 .xlo_offset_pointer = ocfs2_xa_block_offset_pointer, 1693 .xlo_check_space = ocfs2_xa_block_check_space, 1694 .xlo_can_reuse = ocfs2_xa_block_can_reuse, 1695 .xlo_get_free_start = ocfs2_xa_block_get_free_start, 1696 .xlo_wipe_namevalue = ocfs2_xa_block_wipe_namevalue, 1697 .xlo_add_entry = ocfs2_xa_block_add_entry, 1698 .xlo_add_namevalue = ocfs2_xa_block_add_namevalue, 1699 .xlo_fill_value_buf = ocfs2_xa_block_fill_value_buf, 1700 }; 1701 1702 static int ocfs2_xa_bucket_journal_access(handle_t *handle, 1703 struct ocfs2_xa_loc *loc, int type) 1704 { 1705 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1706 1707 return ocfs2_xattr_bucket_journal_access(handle, bucket, type); 1708 } 1709 1710 static void ocfs2_xa_bucket_journal_dirty(handle_t *handle, 1711 struct ocfs2_xa_loc *loc) 1712 { 1713 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1714 1715 ocfs2_xattr_bucket_journal_dirty(handle, bucket); 1716 } 1717 1718 static void *ocfs2_xa_bucket_offset_pointer(struct ocfs2_xa_loc *loc, 1719 int offset) 1720 { 1721 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1722 int block, block_offset; 1723 1724 /* The header is at the front of the bucket */ 1725 block = offset >> loc->xl_inode->i_sb->s_blocksize_bits; 1726 block_offset = offset % loc->xl_inode->i_sb->s_blocksize; 1727 1728 return bucket_block(bucket, block) + block_offset; 1729 } 1730 1731 static int ocfs2_xa_bucket_can_reuse(struct ocfs2_xa_loc *loc, 1732 struct ocfs2_xattr_info *xi) 1733 { 1734 return namevalue_size_xe(loc->xl_entry) >= 1735 namevalue_size_xi(xi); 1736 } 1737 1738 static int ocfs2_xa_bucket_get_free_start(struct ocfs2_xa_loc *loc) 1739 { 1740 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1741 return le16_to_cpu(bucket_xh(bucket)->xh_free_start); 1742 } 1743 1744 static int ocfs2_bucket_align_free_start(struct super_block *sb, 1745 int free_start, int size) 1746 { 1747 /* 1748 * We need to make sure that the name+value pair fits within 1749 * one block. 1750 */ 1751 if (((free_start - size) >> sb->s_blocksize_bits) != 1752 ((free_start - 1) >> sb->s_blocksize_bits)) 1753 free_start -= free_start % sb->s_blocksize; 1754 1755 return free_start; 1756 } 1757 1758 static int ocfs2_xa_bucket_check_space(struct ocfs2_xa_loc *loc, 1759 struct ocfs2_xattr_info *xi) 1760 { 1761 int rc; 1762 int count = le16_to_cpu(loc->xl_header->xh_count); 1763 int free_start = ocfs2_xa_get_free_start(loc); 1764 int needed_space = ocfs2_xi_entry_usage(xi); 1765 int size = namevalue_size_xi(xi); 1766 struct super_block *sb = loc->xl_inode->i_sb; 1767 1768 /* 1769 * Bucket storage does not reclaim name+value pairs it cannot 1770 * reuse. They live as holes until the bucket fills, and then 1771 * the bucket is defragmented. However, the bucket can reclaim 1772 * the ocfs2_xattr_entry. 1773 */ 1774 if (loc->xl_entry) { 1775 /* Don't need space if we're reusing! */ 1776 if (ocfs2_xa_can_reuse_entry(loc, xi)) 1777 needed_space = 0; 1778 else 1779 needed_space -= sizeof(struct ocfs2_xattr_entry); 1780 } 1781 BUG_ON(needed_space < 0); 1782 1783 if (free_start < size) { 1784 if (needed_space) 1785 return -ENOSPC; 1786 } else { 1787 /* 1788 * First we check if it would fit in the first place. 1789 * Below, we align the free start to a block. This may 1790 * slide us below the minimum gap. By checking unaligned 1791 * first, we avoid that error. 1792 */ 1793 rc = ocfs2_xa_check_space_helper(needed_space, free_start, 1794 count); 1795 if (rc) 1796 return rc; 1797 free_start = ocfs2_bucket_align_free_start(sb, free_start, 1798 size); 1799 } 1800 return ocfs2_xa_check_space_helper(needed_space, free_start, count); 1801 } 1802 1803 static void ocfs2_xa_bucket_wipe_namevalue(struct ocfs2_xa_loc *loc) 1804 { 1805 le16_add_cpu(&loc->xl_header->xh_name_value_len, 1806 -namevalue_size_xe(loc->xl_entry)); 1807 } 1808 1809 static void ocfs2_xa_bucket_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) 1810 { 1811 struct ocfs2_xattr_header *xh = loc->xl_header; 1812 int count = le16_to_cpu(xh->xh_count); 1813 int low = 0, high = count - 1, tmp; 1814 struct ocfs2_xattr_entry *tmp_xe; 1815 1816 /* 1817 * We keep buckets sorted by name_hash, so we need to find 1818 * our insert place. 1819 */ 1820 while (low <= high && count) { 1821 tmp = (low + high) / 2; 1822 tmp_xe = &xh->xh_entries[tmp]; 1823 1824 if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash)) 1825 low = tmp + 1; 1826 else if (name_hash < le32_to_cpu(tmp_xe->xe_name_hash)) 1827 high = tmp - 1; 1828 else { 1829 low = tmp; 1830 break; 1831 } 1832 } 1833 1834 if (low != count) 1835 memmove(&xh->xh_entries[low + 1], 1836 &xh->xh_entries[low], 1837 ((count - low) * sizeof(struct ocfs2_xattr_entry))); 1838 1839 le16_add_cpu(&xh->xh_count, 1); 1840 loc->xl_entry = &xh->xh_entries[low]; 1841 memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry)); 1842 } 1843 1844 static void ocfs2_xa_bucket_add_namevalue(struct ocfs2_xa_loc *loc, int size) 1845 { 1846 int free_start = ocfs2_xa_get_free_start(loc); 1847 struct ocfs2_xattr_header *xh = loc->xl_header; 1848 struct super_block *sb = loc->xl_inode->i_sb; 1849 int nameval_offset; 1850 1851 free_start = ocfs2_bucket_align_free_start(sb, free_start, size); 1852 nameval_offset = free_start - size; 1853 loc->xl_entry->xe_name_offset = cpu_to_le16(nameval_offset); 1854 xh->xh_free_start = cpu_to_le16(nameval_offset); 1855 le16_add_cpu(&xh->xh_name_value_len, size); 1856 1857 } 1858 1859 static void ocfs2_xa_bucket_fill_value_buf(struct ocfs2_xa_loc *loc, 1860 struct ocfs2_xattr_value_buf *vb) 1861 { 1862 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1863 struct super_block *sb = loc->xl_inode->i_sb; 1864 int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 1865 int size = namevalue_size_xe(loc->xl_entry); 1866 int block_offset = nameval_offset >> sb->s_blocksize_bits; 1867 1868 /* Values are not allowed to straddle block boundaries */ 1869 BUG_ON(block_offset != 1870 ((nameval_offset + size - 1) >> sb->s_blocksize_bits)); 1871 /* We expect the bucket to be filled in */ 1872 BUG_ON(!bucket->bu_bhs[block_offset]); 1873 1874 vb->vb_access = ocfs2_journal_access; 1875 vb->vb_bh = bucket->bu_bhs[block_offset]; 1876 } 1877 1878 /* Operations for xattrs stored in buckets. */ 1879 static const struct ocfs2_xa_loc_operations ocfs2_xa_bucket_loc_ops = { 1880 .xlo_journal_access = ocfs2_xa_bucket_journal_access, 1881 .xlo_journal_dirty = ocfs2_xa_bucket_journal_dirty, 1882 .xlo_offset_pointer = ocfs2_xa_bucket_offset_pointer, 1883 .xlo_check_space = ocfs2_xa_bucket_check_space, 1884 .xlo_can_reuse = ocfs2_xa_bucket_can_reuse, 1885 .xlo_get_free_start = ocfs2_xa_bucket_get_free_start, 1886 .xlo_wipe_namevalue = ocfs2_xa_bucket_wipe_namevalue, 1887 .xlo_add_entry = ocfs2_xa_bucket_add_entry, 1888 .xlo_add_namevalue = ocfs2_xa_bucket_add_namevalue, 1889 .xlo_fill_value_buf = ocfs2_xa_bucket_fill_value_buf, 1890 }; 1891 1892 static unsigned int ocfs2_xa_value_clusters(struct ocfs2_xa_loc *loc) 1893 { 1894 struct ocfs2_xattr_value_buf vb; 1895 1896 if (ocfs2_xattr_is_local(loc->xl_entry)) 1897 return 0; 1898 1899 ocfs2_xa_fill_value_buf(loc, &vb); 1900 return le32_to_cpu(vb.vb_xv->xr_clusters); 1901 } 1902 1903 static int ocfs2_xa_value_truncate(struct ocfs2_xa_loc *loc, u64 bytes, 1904 struct ocfs2_xattr_set_ctxt *ctxt) 1905 { 1906 int trunc_rc, access_rc; 1907 struct ocfs2_xattr_value_buf vb; 1908 1909 ocfs2_xa_fill_value_buf(loc, &vb); 1910 trunc_rc = ocfs2_xattr_value_truncate(loc->xl_inode, &vb, bytes, 1911 ctxt); 1912 1913 /* 1914 * The caller of ocfs2_xa_value_truncate() has already called 1915 * ocfs2_xa_journal_access on the loc. However, The truncate code 1916 * calls ocfs2_extend_trans(). This may commit the previous 1917 * transaction and open a new one. If this is a bucket, truncate 1918 * could leave only vb->vb_bh set up for journaling. Meanwhile, 1919 * the caller is expecting to dirty the entire bucket. So we must 1920 * reset the journal work. We do this even if truncate has failed, 1921 * as it could have failed after committing the extend. 1922 */ 1923 access_rc = ocfs2_xa_journal_access(ctxt->handle, loc, 1924 OCFS2_JOURNAL_ACCESS_WRITE); 1925 1926 /* Errors in truncate take precedence */ 1927 return trunc_rc ? trunc_rc : access_rc; 1928 } 1929 1930 static void ocfs2_xa_remove_entry(struct ocfs2_xa_loc *loc) 1931 { 1932 int index, count; 1933 struct ocfs2_xattr_header *xh = loc->xl_header; 1934 struct ocfs2_xattr_entry *entry = loc->xl_entry; 1935 1936 ocfs2_xa_wipe_namevalue(loc); 1937 loc->xl_entry = NULL; 1938 1939 le16_add_cpu(&xh->xh_count, -1); 1940 count = le16_to_cpu(xh->xh_count); 1941 1942 /* 1943 * Only zero out the entry if there are more remaining. This is 1944 * important for an empty bucket, as it keeps track of the 1945 * bucket's hash value. It doesn't hurt empty block storage. 1946 */ 1947 if (count) { 1948 index = ((char *)entry - (char *)&xh->xh_entries) / 1949 sizeof(struct ocfs2_xattr_entry); 1950 memmove(&xh->xh_entries[index], &xh->xh_entries[index + 1], 1951 (count - index) * sizeof(struct ocfs2_xattr_entry)); 1952 memset(&xh->xh_entries[count], 0, 1953 sizeof(struct ocfs2_xattr_entry)); 1954 } 1955 } 1956 1957 /* 1958 * If we have a problem adjusting the size of an external value during 1959 * ocfs2_xa_prepare_entry() or ocfs2_xa_remove(), we may have an xattr 1960 * in an intermediate state. For example, the value may be partially 1961 * truncated. 1962 * 1963 * If the value tree hasn't changed, the extend/truncate went nowhere. 1964 * We have nothing to do. The caller can treat it as a straight error. 1965 * 1966 * If the value tree got partially truncated, we now have a corrupted 1967 * extended attribute. We're going to wipe its entry and leak the 1968 * clusters. Better to leak some storage than leave a corrupt entry. 1969 * 1970 * If the value tree grew, it obviously didn't grow enough for the 1971 * new entry. We're not going to try and reclaim those clusters either. 1972 * If there was already an external value there (orig_clusters != 0), 1973 * the new clusters are attached safely and we can just leave the old 1974 * value in place. If there was no external value there, we remove 1975 * the entry. 1976 * 1977 * This way, the xattr block we store in the journal will be consistent. 1978 * If the size change broke because of the journal, no changes will hit 1979 * disk anyway. 1980 */ 1981 static void ocfs2_xa_cleanup_value_truncate(struct ocfs2_xa_loc *loc, 1982 const char *what, 1983 unsigned int orig_clusters) 1984 { 1985 unsigned int new_clusters = ocfs2_xa_value_clusters(loc); 1986 char *nameval_buf = ocfs2_xa_offset_pointer(loc, 1987 le16_to_cpu(loc->xl_entry->xe_name_offset)); 1988 1989 if (new_clusters < orig_clusters) { 1990 mlog(ML_ERROR, 1991 "Partial truncate while %s xattr %.*s. Leaking " 1992 "%u clusters and removing the entry\n", 1993 what, loc->xl_entry->xe_name_len, nameval_buf, 1994 orig_clusters - new_clusters); 1995 ocfs2_xa_remove_entry(loc); 1996 } else if (!orig_clusters) { 1997 mlog(ML_ERROR, 1998 "Unable to allocate an external value for xattr " 1999 "%.*s safely. Leaking %u clusters and removing the " 2000 "entry\n", 2001 loc->xl_entry->xe_name_len, nameval_buf, 2002 new_clusters - orig_clusters); 2003 ocfs2_xa_remove_entry(loc); 2004 } else if (new_clusters > orig_clusters) 2005 mlog(ML_ERROR, 2006 "Unable to grow xattr %.*s safely. %u new clusters " 2007 "have been added, but the value will not be " 2008 "modified\n", 2009 loc->xl_entry->xe_name_len, nameval_buf, 2010 new_clusters - orig_clusters); 2011 } 2012 2013 static int ocfs2_xa_remove(struct ocfs2_xa_loc *loc, 2014 struct ocfs2_xattr_set_ctxt *ctxt) 2015 { 2016 int rc = 0; 2017 unsigned int orig_clusters; 2018 2019 if (!ocfs2_xattr_is_local(loc->xl_entry)) { 2020 orig_clusters = ocfs2_xa_value_clusters(loc); 2021 rc = ocfs2_xa_value_truncate(loc, 0, ctxt); 2022 if (rc) { 2023 mlog_errno(rc); 2024 /* 2025 * Since this is remove, we can return 0 if 2026 * ocfs2_xa_cleanup_value_truncate() is going to 2027 * wipe the entry anyway. So we check the 2028 * cluster count as well. 2029 */ 2030 if (orig_clusters != ocfs2_xa_value_clusters(loc)) 2031 rc = 0; 2032 ocfs2_xa_cleanup_value_truncate(loc, "removing", 2033 orig_clusters); 2034 if (rc) 2035 goto out; 2036 } 2037 } 2038 2039 ocfs2_xa_remove_entry(loc); 2040 2041 out: 2042 return rc; 2043 } 2044 2045 static void ocfs2_xa_install_value_root(struct ocfs2_xa_loc *loc) 2046 { 2047 int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len); 2048 char *nameval_buf; 2049 2050 nameval_buf = ocfs2_xa_offset_pointer(loc, 2051 le16_to_cpu(loc->xl_entry->xe_name_offset)); 2052 memcpy(nameval_buf + name_size, &def_xv, OCFS2_XATTR_ROOT_SIZE); 2053 } 2054 2055 /* 2056 * Take an existing entry and make it ready for the new value. This 2057 * won't allocate space, but it may free space. It should be ready for 2058 * ocfs2_xa_prepare_entry() to finish the work. 2059 */ 2060 static int ocfs2_xa_reuse_entry(struct ocfs2_xa_loc *loc, 2061 struct ocfs2_xattr_info *xi, 2062 struct ocfs2_xattr_set_ctxt *ctxt) 2063 { 2064 int rc = 0; 2065 int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len); 2066 unsigned int orig_clusters; 2067 char *nameval_buf; 2068 int xe_local = ocfs2_xattr_is_local(loc->xl_entry); 2069 int xi_local = xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE; 2070 2071 BUG_ON(OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len) != 2072 name_size); 2073 2074 nameval_buf = ocfs2_xa_offset_pointer(loc, 2075 le16_to_cpu(loc->xl_entry->xe_name_offset)); 2076 if (xe_local) { 2077 memset(nameval_buf + name_size, 0, 2078 namevalue_size_xe(loc->xl_entry) - name_size); 2079 if (!xi_local) 2080 ocfs2_xa_install_value_root(loc); 2081 } else { 2082 orig_clusters = ocfs2_xa_value_clusters(loc); 2083 if (xi_local) { 2084 rc = ocfs2_xa_value_truncate(loc, 0, ctxt); 2085 if (rc < 0) 2086 mlog_errno(rc); 2087 else 2088 memset(nameval_buf + name_size, 0, 2089 namevalue_size_xe(loc->xl_entry) - 2090 name_size); 2091 } else if (le64_to_cpu(loc->xl_entry->xe_value_size) > 2092 xi->xi_value_len) { 2093 rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, 2094 ctxt); 2095 if (rc < 0) 2096 mlog_errno(rc); 2097 } 2098 2099 if (rc) { 2100 ocfs2_xa_cleanup_value_truncate(loc, "reusing", 2101 orig_clusters); 2102 goto out; 2103 } 2104 } 2105 2106 loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len); 2107 ocfs2_xattr_set_local(loc->xl_entry, xi_local); 2108 2109 out: 2110 return rc; 2111 } 2112 2113 /* 2114 * Prepares loc->xl_entry to receive the new xattr. This includes 2115 * properly setting up the name+value pair region. If loc->xl_entry 2116 * already exists, it will take care of modifying it appropriately. 2117 * 2118 * Note that this modifies the data. You did journal_access already, 2119 * right? 2120 */ 2121 static int ocfs2_xa_prepare_entry(struct ocfs2_xa_loc *loc, 2122 struct ocfs2_xattr_info *xi, 2123 u32 name_hash, 2124 struct ocfs2_xattr_set_ctxt *ctxt) 2125 { 2126 int rc = 0; 2127 unsigned int orig_clusters; 2128 __le64 orig_value_size = 0; 2129 2130 rc = ocfs2_xa_check_space(loc, xi); 2131 if (rc) 2132 goto out; 2133 2134 if (loc->xl_entry) { 2135 if (ocfs2_xa_can_reuse_entry(loc, xi)) { 2136 orig_value_size = loc->xl_entry->xe_value_size; 2137 rc = ocfs2_xa_reuse_entry(loc, xi, ctxt); 2138 if (rc) 2139 goto out; 2140 goto alloc_value; 2141 } 2142 2143 if (!ocfs2_xattr_is_local(loc->xl_entry)) { 2144 orig_clusters = ocfs2_xa_value_clusters(loc); 2145 rc = ocfs2_xa_value_truncate(loc, 0, ctxt); 2146 if (rc) { 2147 mlog_errno(rc); 2148 ocfs2_xa_cleanup_value_truncate(loc, 2149 "overwriting", 2150 orig_clusters); 2151 goto out; 2152 } 2153 } 2154 ocfs2_xa_wipe_namevalue(loc); 2155 } else 2156 ocfs2_xa_add_entry(loc, name_hash); 2157 2158 /* 2159 * If we get here, we have a blank entry. Fill it. We grow our 2160 * name+value pair back from the end. 2161 */ 2162 ocfs2_xa_add_namevalue(loc, xi); 2163 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) 2164 ocfs2_xa_install_value_root(loc); 2165 2166 alloc_value: 2167 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 2168 orig_clusters = ocfs2_xa_value_clusters(loc); 2169 rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, ctxt); 2170 if (rc < 0) { 2171 ctxt->set_abort = 1; 2172 ocfs2_xa_cleanup_value_truncate(loc, "growing", 2173 orig_clusters); 2174 /* 2175 * If we were growing an existing value, 2176 * ocfs2_xa_cleanup_value_truncate() won't remove 2177 * the entry. We need to restore the original value 2178 * size. 2179 */ 2180 if (loc->xl_entry) { 2181 BUG_ON(!orig_value_size); 2182 loc->xl_entry->xe_value_size = orig_value_size; 2183 } 2184 mlog_errno(rc); 2185 } 2186 } 2187 2188 out: 2189 return rc; 2190 } 2191 2192 /* 2193 * Store the value portion of the name+value pair. This will skip 2194 * values that are stored externally. Their tree roots were set up 2195 * by ocfs2_xa_prepare_entry(). 2196 */ 2197 static int ocfs2_xa_store_value(struct ocfs2_xa_loc *loc, 2198 struct ocfs2_xattr_info *xi, 2199 struct ocfs2_xattr_set_ctxt *ctxt) 2200 { 2201 int rc = 0; 2202 int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 2203 int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len); 2204 char *nameval_buf; 2205 struct ocfs2_xattr_value_buf vb; 2206 2207 nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset); 2208 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 2209 ocfs2_xa_fill_value_buf(loc, &vb); 2210 rc = __ocfs2_xattr_set_value_outside(loc->xl_inode, 2211 ctxt->handle, &vb, 2212 xi->xi_value, 2213 xi->xi_value_len); 2214 } else 2215 memcpy(nameval_buf + name_size, xi->xi_value, xi->xi_value_len); 2216 2217 return rc; 2218 } 2219 2220 static int ocfs2_xa_set(struct ocfs2_xa_loc *loc, 2221 struct ocfs2_xattr_info *xi, 2222 struct ocfs2_xattr_set_ctxt *ctxt) 2223 { 2224 int ret; 2225 u32 name_hash = ocfs2_xattr_name_hash(loc->xl_inode, xi->xi_name, 2226 xi->xi_name_len); 2227 2228 ret = ocfs2_xa_journal_access(ctxt->handle, loc, 2229 OCFS2_JOURNAL_ACCESS_WRITE); 2230 if (ret) { 2231 mlog_errno(ret); 2232 goto out; 2233 } 2234 2235 /* 2236 * From here on out, everything is going to modify the buffer a 2237 * little. Errors are going to leave the xattr header in a 2238 * sane state. Thus, even with errors we dirty the sucker. 2239 */ 2240 2241 /* Don't worry, we are never called with !xi_value and !xl_entry */ 2242 if (!xi->xi_value) { 2243 ret = ocfs2_xa_remove(loc, ctxt); 2244 goto out_dirty; 2245 } 2246 2247 ret = ocfs2_xa_prepare_entry(loc, xi, name_hash, ctxt); 2248 if (ret) { 2249 if (ret != -ENOSPC) 2250 mlog_errno(ret); 2251 goto out_dirty; 2252 } 2253 2254 ret = ocfs2_xa_store_value(loc, xi, ctxt); 2255 if (ret) 2256 mlog_errno(ret); 2257 2258 out_dirty: 2259 ocfs2_xa_journal_dirty(ctxt->handle, loc); 2260 2261 out: 2262 return ret; 2263 } 2264 2265 static void ocfs2_init_dinode_xa_loc(struct ocfs2_xa_loc *loc, 2266 struct inode *inode, 2267 struct buffer_head *bh, 2268 struct ocfs2_xattr_entry *entry) 2269 { 2270 struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; 2271 2272 BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_XATTR_FL)); 2273 2274 loc->xl_inode = inode; 2275 loc->xl_ops = &ocfs2_xa_block_loc_ops; 2276 loc->xl_storage = bh; 2277 loc->xl_entry = entry; 2278 loc->xl_size = le16_to_cpu(di->i_xattr_inline_size); 2279 loc->xl_header = 2280 (struct ocfs2_xattr_header *)(bh->b_data + bh->b_size - 2281 loc->xl_size); 2282 } 2283 2284 static void ocfs2_init_xattr_block_xa_loc(struct ocfs2_xa_loc *loc, 2285 struct inode *inode, 2286 struct buffer_head *bh, 2287 struct ocfs2_xattr_entry *entry) 2288 { 2289 struct ocfs2_xattr_block *xb = 2290 (struct ocfs2_xattr_block *)bh->b_data; 2291 2292 BUG_ON(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED); 2293 2294 loc->xl_inode = inode; 2295 loc->xl_ops = &ocfs2_xa_block_loc_ops; 2296 loc->xl_storage = bh; 2297 loc->xl_header = &(xb->xb_attrs.xb_header); 2298 loc->xl_entry = entry; 2299 loc->xl_size = bh->b_size - offsetof(struct ocfs2_xattr_block, 2300 xb_attrs.xb_header); 2301 } 2302 2303 static void ocfs2_init_xattr_bucket_xa_loc(struct ocfs2_xa_loc *loc, 2304 struct ocfs2_xattr_bucket *bucket, 2305 struct ocfs2_xattr_entry *entry) 2306 { 2307 loc->xl_inode = bucket->bu_inode; 2308 loc->xl_ops = &ocfs2_xa_bucket_loc_ops; 2309 loc->xl_storage = bucket; 2310 loc->xl_header = bucket_xh(bucket); 2311 loc->xl_entry = entry; 2312 loc->xl_size = OCFS2_XATTR_BUCKET_SIZE; 2313 } 2314 2315 /* 2316 * In xattr remove, if it is stored outside and refcounted, we may have 2317 * the chance to split the refcount tree. So need the allocators. 2318 */ 2319 static int ocfs2_lock_xattr_remove_allocators(struct inode *inode, 2320 struct ocfs2_xattr_value_root *xv, 2321 struct ocfs2_caching_info *ref_ci, 2322 struct buffer_head *ref_root_bh, 2323 struct ocfs2_alloc_context **meta_ac, 2324 int *ref_credits) 2325 { 2326 int ret, meta_add = 0; 2327 u32 p_cluster, num_clusters; 2328 unsigned int ext_flags; 2329 2330 *ref_credits = 0; 2331 ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster, 2332 &num_clusters, 2333 &xv->xr_list, 2334 &ext_flags); 2335 if (ret) { 2336 mlog_errno(ret); 2337 goto out; 2338 } 2339 2340 if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) 2341 goto out; 2342 2343 ret = ocfs2_refcounted_xattr_delete_need(inode, ref_ci, 2344 ref_root_bh, xv, 2345 &meta_add, ref_credits); 2346 if (ret) { 2347 mlog_errno(ret); 2348 goto out; 2349 } 2350 2351 ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb), 2352 meta_add, meta_ac); 2353 if (ret) 2354 mlog_errno(ret); 2355 2356 out: 2357 return ret; 2358 } 2359 2360 static int ocfs2_remove_value_outside(struct inode*inode, 2361 struct ocfs2_xattr_value_buf *vb, 2362 struct ocfs2_xattr_header *header, 2363 struct ocfs2_caching_info *ref_ci, 2364 struct buffer_head *ref_root_bh) 2365 { 2366 int ret = 0, i, ref_credits; 2367 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2368 struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, }; 2369 void *val; 2370 2371 ocfs2_init_dealloc_ctxt(&ctxt.dealloc); 2372 2373 for (i = 0; i < le16_to_cpu(header->xh_count); i++) { 2374 struct ocfs2_xattr_entry *entry = &header->xh_entries[i]; 2375 2376 if (ocfs2_xattr_is_local(entry)) 2377 continue; 2378 2379 val = (void *)header + 2380 le16_to_cpu(entry->xe_name_offset); 2381 vb->vb_xv = (struct ocfs2_xattr_value_root *) 2382 (val + OCFS2_XATTR_SIZE(entry->xe_name_len)); 2383 2384 ret = ocfs2_lock_xattr_remove_allocators(inode, vb->vb_xv, 2385 ref_ci, ref_root_bh, 2386 &ctxt.meta_ac, 2387 &ref_credits); 2388 2389 ctxt.handle = ocfs2_start_trans(osb, ref_credits + 2390 ocfs2_remove_extent_credits(osb->sb)); 2391 if (IS_ERR(ctxt.handle)) { 2392 ret = PTR_ERR(ctxt.handle); 2393 mlog_errno(ret); 2394 break; 2395 } 2396 2397 ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt); 2398 2399 ocfs2_commit_trans(osb, ctxt.handle); 2400 if (ctxt.meta_ac) { 2401 ocfs2_free_alloc_context(ctxt.meta_ac); 2402 ctxt.meta_ac = NULL; 2403 } 2404 2405 if (ret < 0) { 2406 mlog_errno(ret); 2407 break; 2408 } 2409 2410 } 2411 2412 if (ctxt.meta_ac) 2413 ocfs2_free_alloc_context(ctxt.meta_ac); 2414 ocfs2_schedule_truncate_log_flush(osb, 1); 2415 ocfs2_run_deallocs(osb, &ctxt.dealloc); 2416 return ret; 2417 } 2418 2419 static int ocfs2_xattr_ibody_remove(struct inode *inode, 2420 struct buffer_head *di_bh, 2421 struct ocfs2_caching_info *ref_ci, 2422 struct buffer_head *ref_root_bh) 2423 { 2424 2425 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 2426 struct ocfs2_xattr_header *header; 2427 int ret; 2428 struct ocfs2_xattr_value_buf vb = { 2429 .vb_bh = di_bh, 2430 .vb_access = ocfs2_journal_access_di, 2431 }; 2432 2433 header = (struct ocfs2_xattr_header *) 2434 ((void *)di + inode->i_sb->s_blocksize - 2435 le16_to_cpu(di->i_xattr_inline_size)); 2436 2437 ret = ocfs2_remove_value_outside(inode, &vb, header, 2438 ref_ci, ref_root_bh); 2439 2440 return ret; 2441 } 2442 2443 struct ocfs2_rm_xattr_bucket_para { 2444 struct ocfs2_caching_info *ref_ci; 2445 struct buffer_head *ref_root_bh; 2446 }; 2447 2448 static int ocfs2_xattr_block_remove(struct inode *inode, 2449 struct buffer_head *blk_bh, 2450 struct ocfs2_caching_info *ref_ci, 2451 struct buffer_head *ref_root_bh) 2452 { 2453 struct ocfs2_xattr_block *xb; 2454 int ret = 0; 2455 struct ocfs2_xattr_value_buf vb = { 2456 .vb_bh = blk_bh, 2457 .vb_access = ocfs2_journal_access_xb, 2458 }; 2459 struct ocfs2_rm_xattr_bucket_para args = { 2460 .ref_ci = ref_ci, 2461 .ref_root_bh = ref_root_bh, 2462 }; 2463 2464 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 2465 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 2466 struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header); 2467 ret = ocfs2_remove_value_outside(inode, &vb, header, 2468 ref_ci, ref_root_bh); 2469 } else 2470 ret = ocfs2_iterate_xattr_index_block(inode, 2471 blk_bh, 2472 ocfs2_rm_xattr_cluster, 2473 &args); 2474 2475 return ret; 2476 } 2477 2478 static int ocfs2_xattr_free_block(struct inode *inode, 2479 u64 block, 2480 struct ocfs2_caching_info *ref_ci, 2481 struct buffer_head *ref_root_bh) 2482 { 2483 struct inode *xb_alloc_inode; 2484 struct buffer_head *xb_alloc_bh = NULL; 2485 struct buffer_head *blk_bh = NULL; 2486 struct ocfs2_xattr_block *xb; 2487 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2488 handle_t *handle; 2489 int ret = 0; 2490 u64 blk, bg_blkno; 2491 u16 bit; 2492 2493 ret = ocfs2_read_xattr_block(inode, block, &blk_bh); 2494 if (ret < 0) { 2495 mlog_errno(ret); 2496 goto out; 2497 } 2498 2499 ret = ocfs2_xattr_block_remove(inode, blk_bh, ref_ci, ref_root_bh); 2500 if (ret < 0) { 2501 mlog_errno(ret); 2502 goto out; 2503 } 2504 2505 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 2506 blk = le64_to_cpu(xb->xb_blkno); 2507 bit = le16_to_cpu(xb->xb_suballoc_bit); 2508 if (xb->xb_suballoc_loc) 2509 bg_blkno = le64_to_cpu(xb->xb_suballoc_loc); 2510 else 2511 bg_blkno = ocfs2_which_suballoc_group(blk, bit); 2512 2513 xb_alloc_inode = ocfs2_get_system_file_inode(osb, 2514 EXTENT_ALLOC_SYSTEM_INODE, 2515 le16_to_cpu(xb->xb_suballoc_slot)); 2516 if (!xb_alloc_inode) { 2517 ret = -ENOMEM; 2518 mlog_errno(ret); 2519 goto out; 2520 } 2521 inode_lock(xb_alloc_inode); 2522 2523 ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1); 2524 if (ret < 0) { 2525 mlog_errno(ret); 2526 goto out_mutex; 2527 } 2528 2529 handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE); 2530 if (IS_ERR(handle)) { 2531 ret = PTR_ERR(handle); 2532 mlog_errno(ret); 2533 goto out_unlock; 2534 } 2535 2536 ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh, 2537 bit, bg_blkno, 1); 2538 if (ret < 0) 2539 mlog_errno(ret); 2540 2541 ocfs2_commit_trans(osb, handle); 2542 out_unlock: 2543 ocfs2_inode_unlock(xb_alloc_inode, 1); 2544 brelse(xb_alloc_bh); 2545 out_mutex: 2546 inode_unlock(xb_alloc_inode); 2547 iput(xb_alloc_inode); 2548 out: 2549 brelse(blk_bh); 2550 return ret; 2551 } 2552 2553 /* 2554 * ocfs2_xattr_remove() 2555 * 2556 * Free extended attribute resources associated with this inode. 2557 */ 2558 int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh) 2559 { 2560 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2561 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 2562 struct ocfs2_refcount_tree *ref_tree = NULL; 2563 struct buffer_head *ref_root_bh = NULL; 2564 struct ocfs2_caching_info *ref_ci = NULL; 2565 handle_t *handle; 2566 int ret; 2567 2568 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) 2569 return 0; 2570 2571 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) 2572 return 0; 2573 2574 if (ocfs2_is_refcount_inode(inode)) { 2575 ret = ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb), 2576 le64_to_cpu(di->i_refcount_loc), 2577 1, &ref_tree, &ref_root_bh); 2578 if (ret) { 2579 mlog_errno(ret); 2580 goto out; 2581 } 2582 ref_ci = &ref_tree->rf_ci; 2583 2584 } 2585 2586 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 2587 ret = ocfs2_xattr_ibody_remove(inode, di_bh, 2588 ref_ci, ref_root_bh); 2589 if (ret < 0) { 2590 mlog_errno(ret); 2591 goto out; 2592 } 2593 } 2594 2595 if (di->i_xattr_loc) { 2596 ret = ocfs2_xattr_free_block(inode, 2597 le64_to_cpu(di->i_xattr_loc), 2598 ref_ci, ref_root_bh); 2599 if (ret < 0) { 2600 mlog_errno(ret); 2601 goto out; 2602 } 2603 } 2604 2605 handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), 2606 OCFS2_INODE_UPDATE_CREDITS); 2607 if (IS_ERR(handle)) { 2608 ret = PTR_ERR(handle); 2609 mlog_errno(ret); 2610 goto out; 2611 } 2612 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, 2613 OCFS2_JOURNAL_ACCESS_WRITE); 2614 if (ret) { 2615 mlog_errno(ret); 2616 goto out_commit; 2617 } 2618 2619 di->i_xattr_loc = 0; 2620 2621 spin_lock(&oi->ip_lock); 2622 oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL); 2623 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 2624 spin_unlock(&oi->ip_lock); 2625 ocfs2_update_inode_fsync_trans(handle, inode, 0); 2626 2627 ocfs2_journal_dirty(handle, di_bh); 2628 out_commit: 2629 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); 2630 out: 2631 if (ref_tree) 2632 ocfs2_unlock_refcount_tree(OCFS2_SB(inode->i_sb), ref_tree, 1); 2633 brelse(ref_root_bh); 2634 return ret; 2635 } 2636 2637 static int ocfs2_xattr_has_space_inline(struct inode *inode, 2638 struct ocfs2_dinode *di) 2639 { 2640 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2641 unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size; 2642 int free; 2643 2644 if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE) 2645 return 0; 2646 2647 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 2648 struct ocfs2_inline_data *idata = &di->id2.i_data; 2649 free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size); 2650 } else if (ocfs2_inode_is_fast_symlink(inode)) { 2651 free = ocfs2_fast_symlink_chars(inode->i_sb) - 2652 le64_to_cpu(di->i_size); 2653 } else { 2654 struct ocfs2_extent_list *el = &di->id2.i_list; 2655 free = (le16_to_cpu(el->l_count) - 2656 le16_to_cpu(el->l_next_free_rec)) * 2657 sizeof(struct ocfs2_extent_rec); 2658 } 2659 if (free >= xattrsize) 2660 return 1; 2661 2662 return 0; 2663 } 2664 2665 /* 2666 * ocfs2_xattr_ibody_find() 2667 * 2668 * Find extended attribute in inode block and 2669 * fill search info into struct ocfs2_xattr_search. 2670 */ 2671 static int ocfs2_xattr_ibody_find(struct inode *inode, 2672 int name_index, 2673 const char *name, 2674 struct ocfs2_xattr_search *xs) 2675 { 2676 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2677 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 2678 int ret; 2679 int has_space = 0; 2680 2681 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) 2682 return 0; 2683 2684 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { 2685 down_read(&oi->ip_alloc_sem); 2686 has_space = ocfs2_xattr_has_space_inline(inode, di); 2687 up_read(&oi->ip_alloc_sem); 2688 if (!has_space) 2689 return 0; 2690 } 2691 2692 xs->xattr_bh = xs->inode_bh; 2693 xs->end = (void *)di + inode->i_sb->s_blocksize; 2694 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) 2695 xs->header = (struct ocfs2_xattr_header *) 2696 (xs->end - le16_to_cpu(di->i_xattr_inline_size)); 2697 else 2698 xs->header = (struct ocfs2_xattr_header *) 2699 (xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size); 2700 xs->base = (void *)xs->header; 2701 xs->here = xs->header->xh_entries; 2702 2703 /* Find the named attribute. */ 2704 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 2705 ret = ocfs2_xattr_find_entry(name_index, name, xs); 2706 if (ret && ret != -ENODATA) 2707 return ret; 2708 xs->not_found = ret; 2709 } 2710 2711 return 0; 2712 } 2713 2714 static int ocfs2_xattr_ibody_init(struct inode *inode, 2715 struct buffer_head *di_bh, 2716 struct ocfs2_xattr_set_ctxt *ctxt) 2717 { 2718 int ret; 2719 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2720 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 2721 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2722 unsigned int xattrsize = osb->s_xattr_inline_size; 2723 2724 if (!ocfs2_xattr_has_space_inline(inode, di)) { 2725 ret = -ENOSPC; 2726 goto out; 2727 } 2728 2729 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), di_bh, 2730 OCFS2_JOURNAL_ACCESS_WRITE); 2731 if (ret) { 2732 mlog_errno(ret); 2733 goto out; 2734 } 2735 2736 /* 2737 * Adjust extent record count or inline data size 2738 * to reserve space for extended attribute. 2739 */ 2740 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 2741 struct ocfs2_inline_data *idata = &di->id2.i_data; 2742 le16_add_cpu(&idata->id_count, -xattrsize); 2743 } else if (!(ocfs2_inode_is_fast_symlink(inode))) { 2744 struct ocfs2_extent_list *el = &di->id2.i_list; 2745 le16_add_cpu(&el->l_count, -(xattrsize / 2746 sizeof(struct ocfs2_extent_rec))); 2747 } 2748 di->i_xattr_inline_size = cpu_to_le16(xattrsize); 2749 2750 spin_lock(&oi->ip_lock); 2751 oi->ip_dyn_features |= OCFS2_INLINE_XATTR_FL|OCFS2_HAS_XATTR_FL; 2752 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 2753 spin_unlock(&oi->ip_lock); 2754 2755 ocfs2_journal_dirty(ctxt->handle, di_bh); 2756 2757 out: 2758 return ret; 2759 } 2760 2761 /* 2762 * ocfs2_xattr_ibody_set() 2763 * 2764 * Set, replace or remove an extended attribute into inode block. 2765 * 2766 */ 2767 static int ocfs2_xattr_ibody_set(struct inode *inode, 2768 struct ocfs2_xattr_info *xi, 2769 struct ocfs2_xattr_search *xs, 2770 struct ocfs2_xattr_set_ctxt *ctxt) 2771 { 2772 int ret; 2773 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2774 struct ocfs2_xa_loc loc; 2775 2776 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) 2777 return -ENOSPC; 2778 2779 down_write(&oi->ip_alloc_sem); 2780 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { 2781 ret = ocfs2_xattr_ibody_init(inode, xs->inode_bh, ctxt); 2782 if (ret) { 2783 if (ret != -ENOSPC) 2784 mlog_errno(ret); 2785 goto out; 2786 } 2787 } 2788 2789 ocfs2_init_dinode_xa_loc(&loc, inode, xs->inode_bh, 2790 xs->not_found ? NULL : xs->here); 2791 ret = ocfs2_xa_set(&loc, xi, ctxt); 2792 if (ret) { 2793 if (ret != -ENOSPC) 2794 mlog_errno(ret); 2795 goto out; 2796 } 2797 xs->here = loc.xl_entry; 2798 2799 out: 2800 up_write(&oi->ip_alloc_sem); 2801 2802 return ret; 2803 } 2804 2805 /* 2806 * ocfs2_xattr_block_find() 2807 * 2808 * Find extended attribute in external block and 2809 * fill search info into struct ocfs2_xattr_search. 2810 */ 2811 static int ocfs2_xattr_block_find(struct inode *inode, 2812 int name_index, 2813 const char *name, 2814 struct ocfs2_xattr_search *xs) 2815 { 2816 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 2817 struct buffer_head *blk_bh = NULL; 2818 struct ocfs2_xattr_block *xb; 2819 int ret = 0; 2820 2821 if (!di->i_xattr_loc) 2822 return ret; 2823 2824 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc), 2825 &blk_bh); 2826 if (ret < 0) { 2827 mlog_errno(ret); 2828 return ret; 2829 } 2830 2831 xs->xattr_bh = blk_bh; 2832 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 2833 2834 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 2835 xs->header = &xb->xb_attrs.xb_header; 2836 xs->base = (void *)xs->header; 2837 xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size; 2838 xs->here = xs->header->xh_entries; 2839 2840 ret = ocfs2_xattr_find_entry(name_index, name, xs); 2841 } else 2842 ret = ocfs2_xattr_index_block_find(inode, blk_bh, 2843 name_index, 2844 name, xs); 2845 2846 if (ret && ret != -ENODATA) { 2847 xs->xattr_bh = NULL; 2848 goto cleanup; 2849 } 2850 xs->not_found = ret; 2851 return 0; 2852 cleanup: 2853 brelse(blk_bh); 2854 2855 return ret; 2856 } 2857 2858 static int ocfs2_create_xattr_block(struct inode *inode, 2859 struct buffer_head *inode_bh, 2860 struct ocfs2_xattr_set_ctxt *ctxt, 2861 int indexed, 2862 struct buffer_head **ret_bh) 2863 { 2864 int ret; 2865 u16 suballoc_bit_start; 2866 u32 num_got; 2867 u64 suballoc_loc, first_blkno; 2868 struct ocfs2_dinode *di = (struct ocfs2_dinode *)inode_bh->b_data; 2869 struct buffer_head *new_bh = NULL; 2870 struct ocfs2_xattr_block *xblk; 2871 2872 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), 2873 inode_bh, OCFS2_JOURNAL_ACCESS_CREATE); 2874 if (ret < 0) { 2875 mlog_errno(ret); 2876 goto end; 2877 } 2878 2879 ret = ocfs2_claim_metadata(ctxt->handle, ctxt->meta_ac, 1, 2880 &suballoc_loc, &suballoc_bit_start, 2881 &num_got, &first_blkno); 2882 if (ret < 0) { 2883 mlog_errno(ret); 2884 goto end; 2885 } 2886 2887 new_bh = sb_getblk(inode->i_sb, first_blkno); 2888 if (!new_bh) { 2889 ret = -ENOMEM; 2890 mlog_errno(ret); 2891 goto end; 2892 } 2893 2894 ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh); 2895 2896 ret = ocfs2_journal_access_xb(ctxt->handle, INODE_CACHE(inode), 2897 new_bh, 2898 OCFS2_JOURNAL_ACCESS_CREATE); 2899 if (ret < 0) { 2900 mlog_errno(ret); 2901 goto end; 2902 } 2903 2904 /* Initialize ocfs2_xattr_block */ 2905 xblk = (struct ocfs2_xattr_block *)new_bh->b_data; 2906 memset(xblk, 0, inode->i_sb->s_blocksize); 2907 strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE); 2908 xblk->xb_suballoc_slot = cpu_to_le16(ctxt->meta_ac->ac_alloc_slot); 2909 xblk->xb_suballoc_loc = cpu_to_le64(suballoc_loc); 2910 xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start); 2911 xblk->xb_fs_generation = 2912 cpu_to_le32(OCFS2_SB(inode->i_sb)->fs_generation); 2913 xblk->xb_blkno = cpu_to_le64(first_blkno); 2914 if (indexed) { 2915 struct ocfs2_xattr_tree_root *xr = &xblk->xb_attrs.xb_root; 2916 xr->xt_clusters = cpu_to_le32(1); 2917 xr->xt_last_eb_blk = 0; 2918 xr->xt_list.l_tree_depth = 0; 2919 xr->xt_list.l_count = cpu_to_le16( 2920 ocfs2_xattr_recs_per_xb(inode->i_sb)); 2921 xr->xt_list.l_next_free_rec = cpu_to_le16(1); 2922 xblk->xb_flags = cpu_to_le16(OCFS2_XATTR_INDEXED); 2923 } 2924 ocfs2_journal_dirty(ctxt->handle, new_bh); 2925 2926 /* Add it to the inode */ 2927 di->i_xattr_loc = cpu_to_le64(first_blkno); 2928 2929 spin_lock(&OCFS2_I(inode)->ip_lock); 2930 OCFS2_I(inode)->ip_dyn_features |= OCFS2_HAS_XATTR_FL; 2931 di->i_dyn_features = cpu_to_le16(OCFS2_I(inode)->ip_dyn_features); 2932 spin_unlock(&OCFS2_I(inode)->ip_lock); 2933 2934 ocfs2_journal_dirty(ctxt->handle, inode_bh); 2935 2936 *ret_bh = new_bh; 2937 new_bh = NULL; 2938 2939 end: 2940 brelse(new_bh); 2941 return ret; 2942 } 2943 2944 /* 2945 * ocfs2_xattr_block_set() 2946 * 2947 * Set, replace or remove an extended attribute into external block. 2948 * 2949 */ 2950 static int ocfs2_xattr_block_set(struct inode *inode, 2951 struct ocfs2_xattr_info *xi, 2952 struct ocfs2_xattr_search *xs, 2953 struct ocfs2_xattr_set_ctxt *ctxt) 2954 { 2955 struct buffer_head *new_bh = NULL; 2956 struct ocfs2_xattr_block *xblk = NULL; 2957 int ret; 2958 struct ocfs2_xa_loc loc; 2959 2960 if (!xs->xattr_bh) { 2961 ret = ocfs2_create_xattr_block(inode, xs->inode_bh, ctxt, 2962 0, &new_bh); 2963 if (ret) { 2964 mlog_errno(ret); 2965 goto end; 2966 } 2967 2968 xs->xattr_bh = new_bh; 2969 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; 2970 xs->header = &xblk->xb_attrs.xb_header; 2971 xs->base = (void *)xs->header; 2972 xs->end = (void *)xblk + inode->i_sb->s_blocksize; 2973 xs->here = xs->header->xh_entries; 2974 } else 2975 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; 2976 2977 if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) { 2978 ocfs2_init_xattr_block_xa_loc(&loc, inode, xs->xattr_bh, 2979 xs->not_found ? NULL : xs->here); 2980 2981 ret = ocfs2_xa_set(&loc, xi, ctxt); 2982 if (!ret) 2983 xs->here = loc.xl_entry; 2984 else if ((ret != -ENOSPC) || ctxt->set_abort) 2985 goto end; 2986 else { 2987 ret = ocfs2_xattr_create_index_block(inode, xs, ctxt); 2988 if (ret) 2989 goto end; 2990 } 2991 } 2992 2993 if (le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED) 2994 ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt); 2995 2996 end: 2997 return ret; 2998 } 2999 3000 /* Check whether the new xattr can be inserted into the inode. */ 3001 static int ocfs2_xattr_can_be_in_inode(struct inode *inode, 3002 struct ocfs2_xattr_info *xi, 3003 struct ocfs2_xattr_search *xs) 3004 { 3005 struct ocfs2_xattr_entry *last; 3006 int free, i; 3007 size_t min_offs = xs->end - xs->base; 3008 3009 if (!xs->header) 3010 return 0; 3011 3012 last = xs->header->xh_entries; 3013 3014 for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) { 3015 size_t offs = le16_to_cpu(last->xe_name_offset); 3016 if (offs < min_offs) 3017 min_offs = offs; 3018 last += 1; 3019 } 3020 3021 free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP; 3022 if (free < 0) 3023 return 0; 3024 3025 BUG_ON(!xs->not_found); 3026 3027 if (free >= (sizeof(struct ocfs2_xattr_entry) + namevalue_size_xi(xi))) 3028 return 1; 3029 3030 return 0; 3031 } 3032 3033 static int ocfs2_calc_xattr_set_need(struct inode *inode, 3034 struct ocfs2_dinode *di, 3035 struct ocfs2_xattr_info *xi, 3036 struct ocfs2_xattr_search *xis, 3037 struct ocfs2_xattr_search *xbs, 3038 int *clusters_need, 3039 int *meta_need, 3040 int *credits_need) 3041 { 3042 int ret = 0, old_in_xb = 0; 3043 int clusters_add = 0, meta_add = 0, credits = 0; 3044 struct buffer_head *bh = NULL; 3045 struct ocfs2_xattr_block *xb = NULL; 3046 struct ocfs2_xattr_entry *xe = NULL; 3047 struct ocfs2_xattr_value_root *xv = NULL; 3048 char *base = NULL; 3049 int name_offset, name_len = 0; 3050 u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, 3051 xi->xi_value_len); 3052 u64 value_size; 3053 3054 /* 3055 * Calculate the clusters we need to write. 3056 * No matter whether we replace an old one or add a new one, 3057 * we need this for writing. 3058 */ 3059 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) 3060 credits += new_clusters * 3061 ocfs2_clusters_to_blocks(inode->i_sb, 1); 3062 3063 if (xis->not_found && xbs->not_found) { 3064 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3065 3066 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 3067 clusters_add += new_clusters; 3068 credits += ocfs2_calc_extend_credits(inode->i_sb, 3069 &def_xv.xv.xr_list); 3070 } 3071 3072 goto meta_guess; 3073 } 3074 3075 if (!xis->not_found) { 3076 xe = xis->here; 3077 name_offset = le16_to_cpu(xe->xe_name_offset); 3078 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 3079 base = xis->base; 3080 credits += OCFS2_INODE_UPDATE_CREDITS; 3081 } else { 3082 int i, block_off = 0; 3083 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; 3084 xe = xbs->here; 3085 name_offset = le16_to_cpu(xe->xe_name_offset); 3086 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 3087 i = xbs->here - xbs->header->xh_entries; 3088 old_in_xb = 1; 3089 3090 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 3091 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 3092 bucket_xh(xbs->bucket), 3093 i, &block_off, 3094 &name_offset); 3095 base = bucket_block(xbs->bucket, block_off); 3096 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3097 } else { 3098 base = xbs->base; 3099 credits += OCFS2_XATTR_BLOCK_UPDATE_CREDITS; 3100 } 3101 } 3102 3103 /* 3104 * delete a xattr doesn't need metadata and cluster allocation. 3105 * so just calculate the credits and return. 3106 * 3107 * The credits for removing the value tree will be extended 3108 * by ocfs2_remove_extent itself. 3109 */ 3110 if (!xi->xi_value) { 3111 if (!ocfs2_xattr_is_local(xe)) 3112 credits += ocfs2_remove_extent_credits(inode->i_sb); 3113 3114 goto out; 3115 } 3116 3117 /* do cluster allocation guess first. */ 3118 value_size = le64_to_cpu(xe->xe_value_size); 3119 3120 if (old_in_xb) { 3121 /* 3122 * In xattr set, we always try to set the xe in inode first, 3123 * so if it can be inserted into inode successfully, the old 3124 * one will be removed from the xattr block, and this xattr 3125 * will be inserted into inode as a new xattr in inode. 3126 */ 3127 if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) { 3128 clusters_add += new_clusters; 3129 credits += ocfs2_remove_extent_credits(inode->i_sb) + 3130 OCFS2_INODE_UPDATE_CREDITS; 3131 if (!ocfs2_xattr_is_local(xe)) 3132 credits += ocfs2_calc_extend_credits( 3133 inode->i_sb, 3134 &def_xv.xv.xr_list); 3135 goto out; 3136 } 3137 } 3138 3139 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 3140 /* the new values will be stored outside. */ 3141 u32 old_clusters = 0; 3142 3143 if (!ocfs2_xattr_is_local(xe)) { 3144 old_clusters = ocfs2_clusters_for_bytes(inode->i_sb, 3145 value_size); 3146 xv = (struct ocfs2_xattr_value_root *) 3147 (base + name_offset + name_len); 3148 value_size = OCFS2_XATTR_ROOT_SIZE; 3149 } else 3150 xv = &def_xv.xv; 3151 3152 if (old_clusters >= new_clusters) { 3153 credits += ocfs2_remove_extent_credits(inode->i_sb); 3154 goto out; 3155 } else { 3156 meta_add += ocfs2_extend_meta_needed(&xv->xr_list); 3157 clusters_add += new_clusters - old_clusters; 3158 credits += ocfs2_calc_extend_credits(inode->i_sb, 3159 &xv->xr_list); 3160 if (value_size >= OCFS2_XATTR_ROOT_SIZE) 3161 goto out; 3162 } 3163 } else { 3164 /* 3165 * Now the new value will be stored inside. So if the new 3166 * value is smaller than the size of value root or the old 3167 * value, we don't need any allocation, otherwise we have 3168 * to guess metadata allocation. 3169 */ 3170 if ((ocfs2_xattr_is_local(xe) && 3171 (value_size >= xi->xi_value_len)) || 3172 (!ocfs2_xattr_is_local(xe) && 3173 OCFS2_XATTR_ROOT_SIZE >= xi->xi_value_len)) 3174 goto out; 3175 } 3176 3177 meta_guess: 3178 /* calculate metadata allocation. */ 3179 if (di->i_xattr_loc) { 3180 if (!xbs->xattr_bh) { 3181 ret = ocfs2_read_xattr_block(inode, 3182 le64_to_cpu(di->i_xattr_loc), 3183 &bh); 3184 if (ret) { 3185 mlog_errno(ret); 3186 goto out; 3187 } 3188 3189 xb = (struct ocfs2_xattr_block *)bh->b_data; 3190 } else 3191 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; 3192 3193 /* 3194 * If there is already an xattr tree, good, we can calculate 3195 * like other b-trees. Otherwise we may have the chance of 3196 * create a tree, the credit calculation is borrowed from 3197 * ocfs2_calc_extend_credits with root_el = NULL. And the 3198 * new tree will be cluster based, so no meta is needed. 3199 */ 3200 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 3201 struct ocfs2_extent_list *el = 3202 &xb->xb_attrs.xb_root.xt_list; 3203 meta_add += ocfs2_extend_meta_needed(el); 3204 credits += ocfs2_calc_extend_credits(inode->i_sb, 3205 el); 3206 } else 3207 credits += OCFS2_SUBALLOC_ALLOC + 1; 3208 3209 /* 3210 * This cluster will be used either for new bucket or for 3211 * new xattr block. 3212 * If the cluster size is the same as the bucket size, one 3213 * more is needed since we may need to extend the bucket 3214 * also. 3215 */ 3216 clusters_add += 1; 3217 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3218 if (OCFS2_XATTR_BUCKET_SIZE == 3219 OCFS2_SB(inode->i_sb)->s_clustersize) { 3220 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3221 clusters_add += 1; 3222 } 3223 } else { 3224 credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; 3225 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 3226 struct ocfs2_extent_list *el = &def_xv.xv.xr_list; 3227 meta_add += ocfs2_extend_meta_needed(el); 3228 credits += ocfs2_calc_extend_credits(inode->i_sb, 3229 el); 3230 } else { 3231 meta_add += 1; 3232 } 3233 } 3234 out: 3235 if (clusters_need) 3236 *clusters_need = clusters_add; 3237 if (meta_need) 3238 *meta_need = meta_add; 3239 if (credits_need) 3240 *credits_need = credits; 3241 brelse(bh); 3242 return ret; 3243 } 3244 3245 static int ocfs2_init_xattr_set_ctxt(struct inode *inode, 3246 struct ocfs2_dinode *di, 3247 struct ocfs2_xattr_info *xi, 3248 struct ocfs2_xattr_search *xis, 3249 struct ocfs2_xattr_search *xbs, 3250 struct ocfs2_xattr_set_ctxt *ctxt, 3251 int extra_meta, 3252 int *credits) 3253 { 3254 int clusters_add, meta_add, ret; 3255 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 3256 3257 memset(ctxt, 0, sizeof(struct ocfs2_xattr_set_ctxt)); 3258 3259 ocfs2_init_dealloc_ctxt(&ctxt->dealloc); 3260 3261 ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs, 3262 &clusters_add, &meta_add, credits); 3263 if (ret) { 3264 mlog_errno(ret); 3265 return ret; 3266 } 3267 3268 meta_add += extra_meta; 3269 trace_ocfs2_init_xattr_set_ctxt(xi->xi_name, meta_add, 3270 clusters_add, *credits); 3271 3272 if (meta_add) { 3273 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, 3274 &ctxt->meta_ac); 3275 if (ret) { 3276 mlog_errno(ret); 3277 goto out; 3278 } 3279 } 3280 3281 if (clusters_add) { 3282 ret = ocfs2_reserve_clusters(osb, clusters_add, &ctxt->data_ac); 3283 if (ret) 3284 mlog_errno(ret); 3285 } 3286 out: 3287 if (ret) { 3288 if (ctxt->meta_ac) { 3289 ocfs2_free_alloc_context(ctxt->meta_ac); 3290 ctxt->meta_ac = NULL; 3291 } 3292 3293 /* 3294 * We cannot have an error and a non null ctxt->data_ac. 3295 */ 3296 } 3297 3298 return ret; 3299 } 3300 3301 static int __ocfs2_xattr_set_handle(struct inode *inode, 3302 struct ocfs2_dinode *di, 3303 struct ocfs2_xattr_info *xi, 3304 struct ocfs2_xattr_search *xis, 3305 struct ocfs2_xattr_search *xbs, 3306 struct ocfs2_xattr_set_ctxt *ctxt) 3307 { 3308 int ret = 0, credits, old_found; 3309 3310 if (!xi->xi_value) { 3311 /* Remove existing extended attribute */ 3312 if (!xis->not_found) 3313 ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt); 3314 else if (!xbs->not_found) 3315 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); 3316 } else { 3317 /* We always try to set extended attribute into inode first*/ 3318 ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt); 3319 if (!ret && !xbs->not_found) { 3320 /* 3321 * If succeed and that extended attribute existing in 3322 * external block, then we will remove it. 3323 */ 3324 xi->xi_value = NULL; 3325 xi->xi_value_len = 0; 3326 3327 old_found = xis->not_found; 3328 xis->not_found = -ENODATA; 3329 ret = ocfs2_calc_xattr_set_need(inode, 3330 di, 3331 xi, 3332 xis, 3333 xbs, 3334 NULL, 3335 NULL, 3336 &credits); 3337 xis->not_found = old_found; 3338 if (ret) { 3339 mlog_errno(ret); 3340 goto out; 3341 } 3342 3343 ret = ocfs2_extend_trans(ctxt->handle, credits); 3344 if (ret) { 3345 mlog_errno(ret); 3346 goto out; 3347 } 3348 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); 3349 } else if ((ret == -ENOSPC) && !ctxt->set_abort) { 3350 if (di->i_xattr_loc && !xbs->xattr_bh) { 3351 ret = ocfs2_xattr_block_find(inode, 3352 xi->xi_name_index, 3353 xi->xi_name, xbs); 3354 if (ret) 3355 goto out; 3356 3357 old_found = xis->not_found; 3358 xis->not_found = -ENODATA; 3359 ret = ocfs2_calc_xattr_set_need(inode, 3360 di, 3361 xi, 3362 xis, 3363 xbs, 3364 NULL, 3365 NULL, 3366 &credits); 3367 xis->not_found = old_found; 3368 if (ret) { 3369 mlog_errno(ret); 3370 goto out; 3371 } 3372 3373 ret = ocfs2_extend_trans(ctxt->handle, credits); 3374 if (ret) { 3375 mlog_errno(ret); 3376 goto out; 3377 } 3378 } 3379 /* 3380 * If no space in inode, we will set extended attribute 3381 * into external block. 3382 */ 3383 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); 3384 if (ret) 3385 goto out; 3386 if (!xis->not_found) { 3387 /* 3388 * If succeed and that extended attribute 3389 * existing in inode, we will remove it. 3390 */ 3391 xi->xi_value = NULL; 3392 xi->xi_value_len = 0; 3393 xbs->not_found = -ENODATA; 3394 ret = ocfs2_calc_xattr_set_need(inode, 3395 di, 3396 xi, 3397 xis, 3398 xbs, 3399 NULL, 3400 NULL, 3401 &credits); 3402 if (ret) { 3403 mlog_errno(ret); 3404 goto out; 3405 } 3406 3407 ret = ocfs2_extend_trans(ctxt->handle, credits); 3408 if (ret) { 3409 mlog_errno(ret); 3410 goto out; 3411 } 3412 ret = ocfs2_xattr_ibody_set(inode, xi, 3413 xis, ctxt); 3414 } 3415 } 3416 } 3417 3418 if (!ret) { 3419 /* Update inode ctime. */ 3420 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), 3421 xis->inode_bh, 3422 OCFS2_JOURNAL_ACCESS_WRITE); 3423 if (ret) { 3424 mlog_errno(ret); 3425 goto out; 3426 } 3427 3428 inode->i_ctime = current_time(inode); 3429 di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); 3430 di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); 3431 ocfs2_journal_dirty(ctxt->handle, xis->inode_bh); 3432 } 3433 out: 3434 return ret; 3435 } 3436 3437 /* 3438 * This function only called duing creating inode 3439 * for init security/acl xattrs of the new inode. 3440 * All transanction credits have been reserved in mknod. 3441 */ 3442 int ocfs2_xattr_set_handle(handle_t *handle, 3443 struct inode *inode, 3444 struct buffer_head *di_bh, 3445 int name_index, 3446 const char *name, 3447 const void *value, 3448 size_t value_len, 3449 int flags, 3450 struct ocfs2_alloc_context *meta_ac, 3451 struct ocfs2_alloc_context *data_ac) 3452 { 3453 struct ocfs2_dinode *di; 3454 int ret; 3455 3456 struct ocfs2_xattr_info xi = { 3457 .xi_name_index = name_index, 3458 .xi_name = name, 3459 .xi_name_len = strlen(name), 3460 .xi_value = value, 3461 .xi_value_len = value_len, 3462 }; 3463 3464 struct ocfs2_xattr_search xis = { 3465 .not_found = -ENODATA, 3466 }; 3467 3468 struct ocfs2_xattr_search xbs = { 3469 .not_found = -ENODATA, 3470 }; 3471 3472 struct ocfs2_xattr_set_ctxt ctxt = { 3473 .handle = handle, 3474 .meta_ac = meta_ac, 3475 .data_ac = data_ac, 3476 }; 3477 3478 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) 3479 return -EOPNOTSUPP; 3480 3481 /* 3482 * In extreme situation, may need xattr bucket when 3483 * block size is too small. And we have already reserved 3484 * the credits for bucket in mknod. 3485 */ 3486 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) { 3487 xbs.bucket = ocfs2_xattr_bucket_new(inode); 3488 if (!xbs.bucket) { 3489 mlog_errno(-ENOMEM); 3490 return -ENOMEM; 3491 } 3492 } 3493 3494 xis.inode_bh = xbs.inode_bh = di_bh; 3495 di = (struct ocfs2_dinode *)di_bh->b_data; 3496 3497 down_write(&OCFS2_I(inode)->ip_xattr_sem); 3498 3499 ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis); 3500 if (ret) 3501 goto cleanup; 3502 if (xis.not_found) { 3503 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs); 3504 if (ret) 3505 goto cleanup; 3506 } 3507 3508 ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt); 3509 3510 cleanup: 3511 up_write(&OCFS2_I(inode)->ip_xattr_sem); 3512 brelse(xbs.xattr_bh); 3513 ocfs2_xattr_bucket_free(xbs.bucket); 3514 3515 return ret; 3516 } 3517 3518 /* 3519 * ocfs2_xattr_set() 3520 * 3521 * Set, replace or remove an extended attribute for this inode. 3522 * value is NULL to remove an existing extended attribute, else either 3523 * create or replace an extended attribute. 3524 */ 3525 int ocfs2_xattr_set(struct inode *inode, 3526 int name_index, 3527 const char *name, 3528 const void *value, 3529 size_t value_len, 3530 int flags) 3531 { 3532 struct buffer_head *di_bh = NULL; 3533 struct ocfs2_dinode *di; 3534 int ret, credits, had_lock, ref_meta = 0, ref_credits = 0; 3535 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 3536 struct inode *tl_inode = osb->osb_tl_inode; 3537 struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, NULL, }; 3538 struct ocfs2_refcount_tree *ref_tree = NULL; 3539 struct ocfs2_lock_holder oh; 3540 3541 struct ocfs2_xattr_info xi = { 3542 .xi_name_index = name_index, 3543 .xi_name = name, 3544 .xi_name_len = strlen(name), 3545 .xi_value = value, 3546 .xi_value_len = value_len, 3547 }; 3548 3549 struct ocfs2_xattr_search xis = { 3550 .not_found = -ENODATA, 3551 }; 3552 3553 struct ocfs2_xattr_search xbs = { 3554 .not_found = -ENODATA, 3555 }; 3556 3557 if (!ocfs2_supports_xattr(osb)) 3558 return -EOPNOTSUPP; 3559 3560 /* 3561 * Only xbs will be used on indexed trees. xis doesn't need a 3562 * bucket. 3563 */ 3564 xbs.bucket = ocfs2_xattr_bucket_new(inode); 3565 if (!xbs.bucket) { 3566 mlog_errno(-ENOMEM); 3567 return -ENOMEM; 3568 } 3569 3570 had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 1, &oh); 3571 if (had_lock < 0) { 3572 ret = had_lock; 3573 mlog_errno(ret); 3574 goto cleanup_nolock; 3575 } 3576 xis.inode_bh = xbs.inode_bh = di_bh; 3577 di = (struct ocfs2_dinode *)di_bh->b_data; 3578 3579 down_write(&OCFS2_I(inode)->ip_xattr_sem); 3580 /* 3581 * Scan inode and external block to find the same name 3582 * extended attribute and collect search information. 3583 */ 3584 ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis); 3585 if (ret) 3586 goto cleanup; 3587 if (xis.not_found) { 3588 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs); 3589 if (ret) 3590 goto cleanup; 3591 } 3592 3593 if (xis.not_found && xbs.not_found) { 3594 ret = -ENODATA; 3595 if (flags & XATTR_REPLACE) 3596 goto cleanup; 3597 ret = 0; 3598 if (!value) 3599 goto cleanup; 3600 } else { 3601 ret = -EEXIST; 3602 if (flags & XATTR_CREATE) 3603 goto cleanup; 3604 } 3605 3606 /* Check whether the value is refcounted and do some preparation. */ 3607 if (ocfs2_is_refcount_inode(inode) && 3608 (!xis.not_found || !xbs.not_found)) { 3609 ret = ocfs2_prepare_refcount_xattr(inode, di, &xi, 3610 &xis, &xbs, &ref_tree, 3611 &ref_meta, &ref_credits); 3612 if (ret) { 3613 mlog_errno(ret); 3614 goto cleanup; 3615 } 3616 } 3617 3618 inode_lock(tl_inode); 3619 3620 if (ocfs2_truncate_log_needs_flush(osb)) { 3621 ret = __ocfs2_flush_truncate_log(osb); 3622 if (ret < 0) { 3623 inode_unlock(tl_inode); 3624 mlog_errno(ret); 3625 goto cleanup; 3626 } 3627 } 3628 inode_unlock(tl_inode); 3629 3630 ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis, 3631 &xbs, &ctxt, ref_meta, &credits); 3632 if (ret) { 3633 mlog_errno(ret); 3634 goto cleanup; 3635 } 3636 3637 /* we need to update inode's ctime field, so add credit for it. */ 3638 credits += OCFS2_INODE_UPDATE_CREDITS; 3639 ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits); 3640 if (IS_ERR(ctxt.handle)) { 3641 ret = PTR_ERR(ctxt.handle); 3642 mlog_errno(ret); 3643 goto out_free_ac; 3644 } 3645 3646 ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt); 3647 ocfs2_update_inode_fsync_trans(ctxt.handle, inode, 0); 3648 3649 ocfs2_commit_trans(osb, ctxt.handle); 3650 3651 out_free_ac: 3652 if (ctxt.data_ac) 3653 ocfs2_free_alloc_context(ctxt.data_ac); 3654 if (ctxt.meta_ac) 3655 ocfs2_free_alloc_context(ctxt.meta_ac); 3656 if (ocfs2_dealloc_has_cluster(&ctxt.dealloc)) 3657 ocfs2_schedule_truncate_log_flush(osb, 1); 3658 ocfs2_run_deallocs(osb, &ctxt.dealloc); 3659 3660 cleanup: 3661 if (ref_tree) 3662 ocfs2_unlock_refcount_tree(osb, ref_tree, 1); 3663 up_write(&OCFS2_I(inode)->ip_xattr_sem); 3664 if (!value && !ret) { 3665 ret = ocfs2_try_remove_refcount_tree(inode, di_bh); 3666 if (ret) 3667 mlog_errno(ret); 3668 } 3669 ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock); 3670 cleanup_nolock: 3671 brelse(di_bh); 3672 brelse(xbs.xattr_bh); 3673 ocfs2_xattr_bucket_free(xbs.bucket); 3674 3675 return ret; 3676 } 3677 3678 /* 3679 * Find the xattr extent rec which may contains name_hash. 3680 * e_cpos will be the first name hash of the xattr rec. 3681 * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list. 3682 */ 3683 static int ocfs2_xattr_get_rec(struct inode *inode, 3684 u32 name_hash, 3685 u64 *p_blkno, 3686 u32 *e_cpos, 3687 u32 *num_clusters, 3688 struct ocfs2_extent_list *el) 3689 { 3690 int ret = 0, i; 3691 struct buffer_head *eb_bh = NULL; 3692 struct ocfs2_extent_block *eb; 3693 struct ocfs2_extent_rec *rec = NULL; 3694 u64 e_blkno = 0; 3695 3696 if (el->l_tree_depth) { 3697 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, name_hash, 3698 &eb_bh); 3699 if (ret) { 3700 mlog_errno(ret); 3701 goto out; 3702 } 3703 3704 eb = (struct ocfs2_extent_block *) eb_bh->b_data; 3705 el = &eb->h_list; 3706 3707 if (el->l_tree_depth) { 3708 ret = ocfs2_error(inode->i_sb, 3709 "Inode %lu has non zero tree depth in xattr tree block %llu\n", 3710 inode->i_ino, 3711 (unsigned long long)eb_bh->b_blocknr); 3712 goto out; 3713 } 3714 } 3715 3716 for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) { 3717 rec = &el->l_recs[i]; 3718 3719 if (le32_to_cpu(rec->e_cpos) <= name_hash) { 3720 e_blkno = le64_to_cpu(rec->e_blkno); 3721 break; 3722 } 3723 } 3724 3725 if (!e_blkno) { 3726 ret = ocfs2_error(inode->i_sb, "Inode %lu has bad extent record (%u, %u, 0) in xattr\n", 3727 inode->i_ino, 3728 le32_to_cpu(rec->e_cpos), 3729 ocfs2_rec_clusters(el, rec)); 3730 goto out; 3731 } 3732 3733 *p_blkno = le64_to_cpu(rec->e_blkno); 3734 *num_clusters = le16_to_cpu(rec->e_leaf_clusters); 3735 if (e_cpos) 3736 *e_cpos = le32_to_cpu(rec->e_cpos); 3737 out: 3738 brelse(eb_bh); 3739 return ret; 3740 } 3741 3742 typedef int (xattr_bucket_func)(struct inode *inode, 3743 struct ocfs2_xattr_bucket *bucket, 3744 void *para); 3745 3746 static int ocfs2_find_xe_in_bucket(struct inode *inode, 3747 struct ocfs2_xattr_bucket *bucket, 3748 int name_index, 3749 const char *name, 3750 u32 name_hash, 3751 u16 *xe_index, 3752 int *found) 3753 { 3754 int i, ret = 0, cmp = 1, block_off, new_offset; 3755 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 3756 size_t name_len = strlen(name); 3757 struct ocfs2_xattr_entry *xe = NULL; 3758 char *xe_name; 3759 3760 /* 3761 * We don't use binary search in the bucket because there 3762 * may be multiple entries with the same name hash. 3763 */ 3764 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 3765 xe = &xh->xh_entries[i]; 3766 3767 if (name_hash > le32_to_cpu(xe->xe_name_hash)) 3768 continue; 3769 else if (name_hash < le32_to_cpu(xe->xe_name_hash)) 3770 break; 3771 3772 cmp = name_index - ocfs2_xattr_get_type(xe); 3773 if (!cmp) 3774 cmp = name_len - xe->xe_name_len; 3775 if (cmp) 3776 continue; 3777 3778 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 3779 xh, 3780 i, 3781 &block_off, 3782 &new_offset); 3783 if (ret) { 3784 mlog_errno(ret); 3785 break; 3786 } 3787 3788 3789 xe_name = bucket_block(bucket, block_off) + new_offset; 3790 if (!memcmp(name, xe_name, name_len)) { 3791 *xe_index = i; 3792 *found = 1; 3793 ret = 0; 3794 break; 3795 } 3796 } 3797 3798 return ret; 3799 } 3800 3801 /* 3802 * Find the specified xattr entry in a series of buckets. 3803 * This series start from p_blkno and last for num_clusters. 3804 * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains 3805 * the num of the valid buckets. 3806 * 3807 * Return the buffer_head this xattr should reside in. And if the xattr's 3808 * hash is in the gap of 2 buckets, return the lower bucket. 3809 */ 3810 static int ocfs2_xattr_bucket_find(struct inode *inode, 3811 int name_index, 3812 const char *name, 3813 u32 name_hash, 3814 u64 p_blkno, 3815 u32 first_hash, 3816 u32 num_clusters, 3817 struct ocfs2_xattr_search *xs) 3818 { 3819 int ret, found = 0; 3820 struct ocfs2_xattr_header *xh = NULL; 3821 struct ocfs2_xattr_entry *xe = NULL; 3822 u16 index = 0; 3823 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3824 int low_bucket = 0, bucket, high_bucket; 3825 struct ocfs2_xattr_bucket *search; 3826 u64 blkno, lower_blkno = 0; 3827 3828 search = ocfs2_xattr_bucket_new(inode); 3829 if (!search) { 3830 ret = -ENOMEM; 3831 mlog_errno(ret); 3832 goto out; 3833 } 3834 3835 ret = ocfs2_read_xattr_bucket(search, p_blkno); 3836 if (ret) { 3837 mlog_errno(ret); 3838 goto out; 3839 } 3840 3841 xh = bucket_xh(search); 3842 high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1; 3843 while (low_bucket <= high_bucket) { 3844 ocfs2_xattr_bucket_relse(search); 3845 3846 bucket = (low_bucket + high_bucket) / 2; 3847 blkno = p_blkno + bucket * blk_per_bucket; 3848 ret = ocfs2_read_xattr_bucket(search, blkno); 3849 if (ret) { 3850 mlog_errno(ret); 3851 goto out; 3852 } 3853 3854 xh = bucket_xh(search); 3855 xe = &xh->xh_entries[0]; 3856 if (name_hash < le32_to_cpu(xe->xe_name_hash)) { 3857 high_bucket = bucket - 1; 3858 continue; 3859 } 3860 3861 /* 3862 * Check whether the hash of the last entry in our 3863 * bucket is larger than the search one. for an empty 3864 * bucket, the last one is also the first one. 3865 */ 3866 if (xh->xh_count) 3867 xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1]; 3868 3869 /* record lower_blkno which may be the insert place. */ 3870 lower_blkno = blkno; 3871 3872 if (name_hash > le32_to_cpu(xe->xe_name_hash)) { 3873 low_bucket = bucket + 1; 3874 continue; 3875 } 3876 3877 /* the searched xattr should reside in this bucket if exists. */ 3878 ret = ocfs2_find_xe_in_bucket(inode, search, 3879 name_index, name, name_hash, 3880 &index, &found); 3881 if (ret) { 3882 mlog_errno(ret); 3883 goto out; 3884 } 3885 break; 3886 } 3887 3888 /* 3889 * Record the bucket we have found. 3890 * When the xattr's hash value is in the gap of 2 buckets, we will 3891 * always set it to the previous bucket. 3892 */ 3893 if (!lower_blkno) 3894 lower_blkno = p_blkno; 3895 3896 /* This should be in cache - we just read it during the search */ 3897 ret = ocfs2_read_xattr_bucket(xs->bucket, lower_blkno); 3898 if (ret) { 3899 mlog_errno(ret); 3900 goto out; 3901 } 3902 3903 xs->header = bucket_xh(xs->bucket); 3904 xs->base = bucket_block(xs->bucket, 0); 3905 xs->end = xs->base + inode->i_sb->s_blocksize; 3906 3907 if (found) { 3908 xs->here = &xs->header->xh_entries[index]; 3909 trace_ocfs2_xattr_bucket_find(OCFS2_I(inode)->ip_blkno, 3910 name, name_index, name_hash, 3911 (unsigned long long)bucket_blkno(xs->bucket), 3912 index); 3913 } else 3914 ret = -ENODATA; 3915 3916 out: 3917 ocfs2_xattr_bucket_free(search); 3918 return ret; 3919 } 3920 3921 static int ocfs2_xattr_index_block_find(struct inode *inode, 3922 struct buffer_head *root_bh, 3923 int name_index, 3924 const char *name, 3925 struct ocfs2_xattr_search *xs) 3926 { 3927 int ret; 3928 struct ocfs2_xattr_block *xb = 3929 (struct ocfs2_xattr_block *)root_bh->b_data; 3930 struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root; 3931 struct ocfs2_extent_list *el = &xb_root->xt_list; 3932 u64 p_blkno = 0; 3933 u32 first_hash, num_clusters = 0; 3934 u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name)); 3935 3936 if (le16_to_cpu(el->l_next_free_rec) == 0) 3937 return -ENODATA; 3938 3939 trace_ocfs2_xattr_index_block_find(OCFS2_I(inode)->ip_blkno, 3940 name, name_index, name_hash, 3941 (unsigned long long)root_bh->b_blocknr, 3942 -1); 3943 3944 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash, 3945 &num_clusters, el); 3946 if (ret) { 3947 mlog_errno(ret); 3948 goto out; 3949 } 3950 3951 BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash); 3952 3953 trace_ocfs2_xattr_index_block_find_rec(OCFS2_I(inode)->ip_blkno, 3954 name, name_index, first_hash, 3955 (unsigned long long)p_blkno, 3956 num_clusters); 3957 3958 ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash, 3959 p_blkno, first_hash, num_clusters, xs); 3960 3961 out: 3962 return ret; 3963 } 3964 3965 static int ocfs2_iterate_xattr_buckets(struct inode *inode, 3966 u64 blkno, 3967 u32 clusters, 3968 xattr_bucket_func *func, 3969 void *para) 3970 { 3971 int i, ret = 0; 3972 u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)); 3973 u32 num_buckets = clusters * bpc; 3974 struct ocfs2_xattr_bucket *bucket; 3975 3976 bucket = ocfs2_xattr_bucket_new(inode); 3977 if (!bucket) { 3978 mlog_errno(-ENOMEM); 3979 return -ENOMEM; 3980 } 3981 3982 trace_ocfs2_iterate_xattr_buckets( 3983 (unsigned long long)OCFS2_I(inode)->ip_blkno, 3984 (unsigned long long)blkno, clusters); 3985 3986 for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) { 3987 ret = ocfs2_read_xattr_bucket(bucket, blkno); 3988 if (ret) { 3989 mlog_errno(ret); 3990 break; 3991 } 3992 3993 /* 3994 * The real bucket num in this series of blocks is stored 3995 * in the 1st bucket. 3996 */ 3997 if (i == 0) 3998 num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets); 3999 4000 trace_ocfs2_iterate_xattr_bucket((unsigned long long)blkno, 4001 le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash)); 4002 if (func) { 4003 ret = func(inode, bucket, para); 4004 if (ret && ret != -ERANGE) 4005 mlog_errno(ret); 4006 /* Fall through to bucket_relse() */ 4007 } 4008 4009 ocfs2_xattr_bucket_relse(bucket); 4010 if (ret) 4011 break; 4012 } 4013 4014 ocfs2_xattr_bucket_free(bucket); 4015 return ret; 4016 } 4017 4018 struct ocfs2_xattr_tree_list { 4019 char *buffer; 4020 size_t buffer_size; 4021 size_t result; 4022 }; 4023 4024 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb, 4025 struct ocfs2_xattr_header *xh, 4026 int index, 4027 int *block_off, 4028 int *new_offset) 4029 { 4030 u16 name_offset; 4031 4032 if (index < 0 || index >= le16_to_cpu(xh->xh_count)) 4033 return -EINVAL; 4034 4035 name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset); 4036 4037 *block_off = name_offset >> sb->s_blocksize_bits; 4038 *new_offset = name_offset % sb->s_blocksize; 4039 4040 return 0; 4041 } 4042 4043 static int ocfs2_list_xattr_bucket(struct inode *inode, 4044 struct ocfs2_xattr_bucket *bucket, 4045 void *para) 4046 { 4047 int ret = 0, type; 4048 struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para; 4049 int i, block_off, new_offset; 4050 const char *name; 4051 4052 for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) { 4053 struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i]; 4054 type = ocfs2_xattr_get_type(entry); 4055 4056 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 4057 bucket_xh(bucket), 4058 i, 4059 &block_off, 4060 &new_offset); 4061 if (ret) 4062 break; 4063 4064 name = (const char *)bucket_block(bucket, block_off) + 4065 new_offset; 4066 ret = ocfs2_xattr_list_entry(inode->i_sb, 4067 xl->buffer, 4068 xl->buffer_size, 4069 &xl->result, 4070 type, name, 4071 entry->xe_name_len); 4072 if (ret) 4073 break; 4074 } 4075 4076 return ret; 4077 } 4078 4079 static int ocfs2_iterate_xattr_index_block(struct inode *inode, 4080 struct buffer_head *blk_bh, 4081 xattr_tree_rec_func *rec_func, 4082 void *para) 4083 { 4084 struct ocfs2_xattr_block *xb = 4085 (struct ocfs2_xattr_block *)blk_bh->b_data; 4086 struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list; 4087 int ret = 0; 4088 u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0; 4089 u64 p_blkno = 0; 4090 4091 if (!el->l_next_free_rec || !rec_func) 4092 return 0; 4093 4094 while (name_hash > 0) { 4095 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, 4096 &e_cpos, &num_clusters, el); 4097 if (ret) { 4098 mlog_errno(ret); 4099 break; 4100 } 4101 4102 ret = rec_func(inode, blk_bh, p_blkno, e_cpos, 4103 num_clusters, para); 4104 if (ret) { 4105 if (ret != -ERANGE) 4106 mlog_errno(ret); 4107 break; 4108 } 4109 4110 if (e_cpos == 0) 4111 break; 4112 4113 name_hash = e_cpos - 1; 4114 } 4115 4116 return ret; 4117 4118 } 4119 4120 static int ocfs2_list_xattr_tree_rec(struct inode *inode, 4121 struct buffer_head *root_bh, 4122 u64 blkno, u32 cpos, u32 len, void *para) 4123 { 4124 return ocfs2_iterate_xattr_buckets(inode, blkno, len, 4125 ocfs2_list_xattr_bucket, para); 4126 } 4127 4128 static int ocfs2_xattr_tree_list_index_block(struct inode *inode, 4129 struct buffer_head *blk_bh, 4130 char *buffer, 4131 size_t buffer_size) 4132 { 4133 int ret; 4134 struct ocfs2_xattr_tree_list xl = { 4135 .buffer = buffer, 4136 .buffer_size = buffer_size, 4137 .result = 0, 4138 }; 4139 4140 ret = ocfs2_iterate_xattr_index_block(inode, blk_bh, 4141 ocfs2_list_xattr_tree_rec, &xl); 4142 if (ret) { 4143 mlog_errno(ret); 4144 goto out; 4145 } 4146 4147 ret = xl.result; 4148 out: 4149 return ret; 4150 } 4151 4152 static int cmp_xe(const void *a, const void *b) 4153 { 4154 const struct ocfs2_xattr_entry *l = a, *r = b; 4155 u32 l_hash = le32_to_cpu(l->xe_name_hash); 4156 u32 r_hash = le32_to_cpu(r->xe_name_hash); 4157 4158 if (l_hash > r_hash) 4159 return 1; 4160 if (l_hash < r_hash) 4161 return -1; 4162 return 0; 4163 } 4164 4165 static void swap_xe(void *a, void *b, int size) 4166 { 4167 struct ocfs2_xattr_entry *l = a, *r = b, tmp; 4168 4169 tmp = *l; 4170 memcpy(l, r, sizeof(struct ocfs2_xattr_entry)); 4171 memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry)); 4172 } 4173 4174 /* 4175 * When the ocfs2_xattr_block is filled up, new bucket will be created 4176 * and all the xattr entries will be moved to the new bucket. 4177 * The header goes at the start of the bucket, and the names+values are 4178 * filled from the end. This is why *target starts as the last buffer. 4179 * Note: we need to sort the entries since they are not saved in order 4180 * in the ocfs2_xattr_block. 4181 */ 4182 static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode, 4183 struct buffer_head *xb_bh, 4184 struct ocfs2_xattr_bucket *bucket) 4185 { 4186 int i, blocksize = inode->i_sb->s_blocksize; 4187 int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 4188 u16 offset, size, off_change; 4189 struct ocfs2_xattr_entry *xe; 4190 struct ocfs2_xattr_block *xb = 4191 (struct ocfs2_xattr_block *)xb_bh->b_data; 4192 struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header; 4193 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 4194 u16 count = le16_to_cpu(xb_xh->xh_count); 4195 char *src = xb_bh->b_data; 4196 char *target = bucket_block(bucket, blks - 1); 4197 4198 trace_ocfs2_cp_xattr_block_to_bucket_begin( 4199 (unsigned long long)xb_bh->b_blocknr, 4200 (unsigned long long)bucket_blkno(bucket)); 4201 4202 for (i = 0; i < blks; i++) 4203 memset(bucket_block(bucket, i), 0, blocksize); 4204 4205 /* 4206 * Since the xe_name_offset is based on ocfs2_xattr_header, 4207 * there is a offset change corresponding to the change of 4208 * ocfs2_xattr_header's position. 4209 */ 4210 off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); 4211 xe = &xb_xh->xh_entries[count - 1]; 4212 offset = le16_to_cpu(xe->xe_name_offset) + off_change; 4213 size = blocksize - offset; 4214 4215 /* copy all the names and values. */ 4216 memcpy(target + offset, src + offset, size); 4217 4218 /* Init new header now. */ 4219 xh->xh_count = xb_xh->xh_count; 4220 xh->xh_num_buckets = cpu_to_le16(1); 4221 xh->xh_name_value_len = cpu_to_le16(size); 4222 xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size); 4223 4224 /* copy all the entries. */ 4225 target = bucket_block(bucket, 0); 4226 offset = offsetof(struct ocfs2_xattr_header, xh_entries); 4227 size = count * sizeof(struct ocfs2_xattr_entry); 4228 memcpy(target + offset, (char *)xb_xh + offset, size); 4229 4230 /* Change the xe offset for all the xe because of the move. */ 4231 off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize + 4232 offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); 4233 for (i = 0; i < count; i++) 4234 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change); 4235 4236 trace_ocfs2_cp_xattr_block_to_bucket_end(offset, size, off_change); 4237 4238 sort(target + offset, count, sizeof(struct ocfs2_xattr_entry), 4239 cmp_xe, swap_xe); 4240 } 4241 4242 /* 4243 * After we move xattr from block to index btree, we have to 4244 * update ocfs2_xattr_search to the new xe and base. 4245 * 4246 * When the entry is in xattr block, xattr_bh indicates the storage place. 4247 * While if the entry is in index b-tree, "bucket" indicates the 4248 * real place of the xattr. 4249 */ 4250 static void ocfs2_xattr_update_xattr_search(struct inode *inode, 4251 struct ocfs2_xattr_search *xs, 4252 struct buffer_head *old_bh) 4253 { 4254 char *buf = old_bh->b_data; 4255 struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf; 4256 struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header; 4257 int i; 4258 4259 xs->header = bucket_xh(xs->bucket); 4260 xs->base = bucket_block(xs->bucket, 0); 4261 xs->end = xs->base + inode->i_sb->s_blocksize; 4262 4263 if (xs->not_found) 4264 return; 4265 4266 i = xs->here - old_xh->xh_entries; 4267 xs->here = &xs->header->xh_entries[i]; 4268 } 4269 4270 static int ocfs2_xattr_create_index_block(struct inode *inode, 4271 struct ocfs2_xattr_search *xs, 4272 struct ocfs2_xattr_set_ctxt *ctxt) 4273 { 4274 int ret; 4275 u32 bit_off, len; 4276 u64 blkno; 4277 handle_t *handle = ctxt->handle; 4278 struct ocfs2_inode_info *oi = OCFS2_I(inode); 4279 struct buffer_head *xb_bh = xs->xattr_bh; 4280 struct ocfs2_xattr_block *xb = 4281 (struct ocfs2_xattr_block *)xb_bh->b_data; 4282 struct ocfs2_xattr_tree_root *xr; 4283 u16 xb_flags = le16_to_cpu(xb->xb_flags); 4284 4285 trace_ocfs2_xattr_create_index_block_begin( 4286 (unsigned long long)xb_bh->b_blocknr); 4287 4288 BUG_ON(xb_flags & OCFS2_XATTR_INDEXED); 4289 BUG_ON(!xs->bucket); 4290 4291 /* 4292 * XXX: 4293 * We can use this lock for now, and maybe move to a dedicated mutex 4294 * if performance becomes a problem later. 4295 */ 4296 down_write(&oi->ip_alloc_sem); 4297 4298 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), xb_bh, 4299 OCFS2_JOURNAL_ACCESS_WRITE); 4300 if (ret) { 4301 mlog_errno(ret); 4302 goto out; 4303 } 4304 4305 ret = __ocfs2_claim_clusters(handle, ctxt->data_ac, 4306 1, 1, &bit_off, &len); 4307 if (ret) { 4308 mlog_errno(ret); 4309 goto out; 4310 } 4311 4312 /* 4313 * The bucket may spread in many blocks, and 4314 * we will only touch the 1st block and the last block 4315 * in the whole bucket(one for entry and one for data). 4316 */ 4317 blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off); 4318 4319 trace_ocfs2_xattr_create_index_block((unsigned long long)blkno); 4320 4321 ret = ocfs2_init_xattr_bucket(xs->bucket, blkno, 1); 4322 if (ret) { 4323 mlog_errno(ret); 4324 goto out; 4325 } 4326 4327 ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket, 4328 OCFS2_JOURNAL_ACCESS_CREATE); 4329 if (ret) { 4330 mlog_errno(ret); 4331 goto out; 4332 } 4333 4334 ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket); 4335 ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket); 4336 4337 ocfs2_xattr_update_xattr_search(inode, xs, xb_bh); 4338 4339 /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */ 4340 memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize - 4341 offsetof(struct ocfs2_xattr_block, xb_attrs)); 4342 4343 xr = &xb->xb_attrs.xb_root; 4344 xr->xt_clusters = cpu_to_le32(1); 4345 xr->xt_last_eb_blk = 0; 4346 xr->xt_list.l_tree_depth = 0; 4347 xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb)); 4348 xr->xt_list.l_next_free_rec = cpu_to_le16(1); 4349 4350 xr->xt_list.l_recs[0].e_cpos = 0; 4351 xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno); 4352 xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1); 4353 4354 xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED); 4355 4356 ocfs2_journal_dirty(handle, xb_bh); 4357 4358 out: 4359 up_write(&oi->ip_alloc_sem); 4360 4361 return ret; 4362 } 4363 4364 static int cmp_xe_offset(const void *a, const void *b) 4365 { 4366 const struct ocfs2_xattr_entry *l = a, *r = b; 4367 u32 l_name_offset = le16_to_cpu(l->xe_name_offset); 4368 u32 r_name_offset = le16_to_cpu(r->xe_name_offset); 4369 4370 if (l_name_offset < r_name_offset) 4371 return 1; 4372 if (l_name_offset > r_name_offset) 4373 return -1; 4374 return 0; 4375 } 4376 4377 /* 4378 * defrag a xattr bucket if we find that the bucket has some 4379 * holes beteen name/value pairs. 4380 * We will move all the name/value pairs to the end of the bucket 4381 * so that we can spare some space for insertion. 4382 */ 4383 static int ocfs2_defrag_xattr_bucket(struct inode *inode, 4384 handle_t *handle, 4385 struct ocfs2_xattr_bucket *bucket) 4386 { 4387 int ret, i; 4388 size_t end, offset, len; 4389 struct ocfs2_xattr_header *xh; 4390 char *entries, *buf, *bucket_buf = NULL; 4391 u64 blkno = bucket_blkno(bucket); 4392 u16 xh_free_start; 4393 size_t blocksize = inode->i_sb->s_blocksize; 4394 struct ocfs2_xattr_entry *xe; 4395 4396 /* 4397 * In order to make the operation more efficient and generic, 4398 * we copy all the blocks into a contiguous memory and do the 4399 * defragment there, so if anything is error, we will not touch 4400 * the real block. 4401 */ 4402 bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS); 4403 if (!bucket_buf) { 4404 ret = -EIO; 4405 goto out; 4406 } 4407 4408 buf = bucket_buf; 4409 for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize) 4410 memcpy(buf, bucket_block(bucket, i), blocksize); 4411 4412 ret = ocfs2_xattr_bucket_journal_access(handle, bucket, 4413 OCFS2_JOURNAL_ACCESS_WRITE); 4414 if (ret < 0) { 4415 mlog_errno(ret); 4416 goto out; 4417 } 4418 4419 xh = (struct ocfs2_xattr_header *)bucket_buf; 4420 entries = (char *)xh->xh_entries; 4421 xh_free_start = le16_to_cpu(xh->xh_free_start); 4422 4423 trace_ocfs2_defrag_xattr_bucket( 4424 (unsigned long long)blkno, le16_to_cpu(xh->xh_count), 4425 xh_free_start, le16_to_cpu(xh->xh_name_value_len)); 4426 4427 /* 4428 * sort all the entries by their offset. 4429 * the largest will be the first, so that we can 4430 * move them to the end one by one. 4431 */ 4432 sort(entries, le16_to_cpu(xh->xh_count), 4433 sizeof(struct ocfs2_xattr_entry), 4434 cmp_xe_offset, swap_xe); 4435 4436 /* Move all name/values to the end of the bucket. */ 4437 xe = xh->xh_entries; 4438 end = OCFS2_XATTR_BUCKET_SIZE; 4439 for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) { 4440 offset = le16_to_cpu(xe->xe_name_offset); 4441 len = namevalue_size_xe(xe); 4442 4443 /* 4444 * We must make sure that the name/value pair 4445 * exist in the same block. So adjust end to 4446 * the previous block end if needed. 4447 */ 4448 if (((end - len) / blocksize != 4449 (end - 1) / blocksize)) 4450 end = end - end % blocksize; 4451 4452 if (end > offset + len) { 4453 memmove(bucket_buf + end - len, 4454 bucket_buf + offset, len); 4455 xe->xe_name_offset = cpu_to_le16(end - len); 4456 } 4457 4458 mlog_bug_on_msg(end < offset + len, "Defrag check failed for " 4459 "bucket %llu\n", (unsigned long long)blkno); 4460 4461 end -= len; 4462 } 4463 4464 mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for " 4465 "bucket %llu\n", (unsigned long long)blkno); 4466 4467 if (xh_free_start == end) 4468 goto out; 4469 4470 memset(bucket_buf + xh_free_start, 0, end - xh_free_start); 4471 xh->xh_free_start = cpu_to_le16(end); 4472 4473 /* sort the entries by their name_hash. */ 4474 sort(entries, le16_to_cpu(xh->xh_count), 4475 sizeof(struct ocfs2_xattr_entry), 4476 cmp_xe, swap_xe); 4477 4478 buf = bucket_buf; 4479 for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize) 4480 memcpy(bucket_block(bucket, i), buf, blocksize); 4481 ocfs2_xattr_bucket_journal_dirty(handle, bucket); 4482 4483 out: 4484 kfree(bucket_buf); 4485 return ret; 4486 } 4487 4488 /* 4489 * prev_blkno points to the start of an existing extent. new_blkno 4490 * points to a newly allocated extent. Because we know each of our 4491 * clusters contains more than bucket, we can easily split one cluster 4492 * at a bucket boundary. So we take the last cluster of the existing 4493 * extent and split it down the middle. We move the last half of the 4494 * buckets in the last cluster of the existing extent over to the new 4495 * extent. 4496 * 4497 * first_bh is the buffer at prev_blkno so we can update the existing 4498 * extent's bucket count. header_bh is the bucket were we were hoping 4499 * to insert our xattr. If the bucket move places the target in the new 4500 * extent, we'll update first_bh and header_bh after modifying the old 4501 * extent. 4502 * 4503 * first_hash will be set as the 1st xe's name_hash in the new extent. 4504 */ 4505 static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode, 4506 handle_t *handle, 4507 struct ocfs2_xattr_bucket *first, 4508 struct ocfs2_xattr_bucket *target, 4509 u64 new_blkno, 4510 u32 num_clusters, 4511 u32 *first_hash) 4512 { 4513 int ret; 4514 struct super_block *sb = inode->i_sb; 4515 int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(sb); 4516 int num_buckets = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb)); 4517 int to_move = num_buckets / 2; 4518 u64 src_blkno; 4519 u64 last_cluster_blkno = bucket_blkno(first) + 4520 ((num_clusters - 1) * ocfs2_clusters_to_blocks(sb, 1)); 4521 4522 BUG_ON(le16_to_cpu(bucket_xh(first)->xh_num_buckets) < num_buckets); 4523 BUG_ON(OCFS2_XATTR_BUCKET_SIZE == OCFS2_SB(sb)->s_clustersize); 4524 4525 trace_ocfs2_mv_xattr_bucket_cross_cluster( 4526 (unsigned long long)last_cluster_blkno, 4527 (unsigned long long)new_blkno); 4528 4529 ret = ocfs2_mv_xattr_buckets(inode, handle, bucket_blkno(first), 4530 last_cluster_blkno, new_blkno, 4531 to_move, first_hash); 4532 if (ret) { 4533 mlog_errno(ret); 4534 goto out; 4535 } 4536 4537 /* This is the first bucket that got moved */ 4538 src_blkno = last_cluster_blkno + (to_move * blks_per_bucket); 4539 4540 /* 4541 * If the target bucket was part of the moved buckets, we need to 4542 * update first and target. 4543 */ 4544 if (bucket_blkno(target) >= src_blkno) { 4545 /* Find the block for the new target bucket */ 4546 src_blkno = new_blkno + 4547 (bucket_blkno(target) - src_blkno); 4548 4549 ocfs2_xattr_bucket_relse(first); 4550 ocfs2_xattr_bucket_relse(target); 4551 4552 /* 4553 * These shouldn't fail - the buffers are in the 4554 * journal from ocfs2_cp_xattr_bucket(). 4555 */ 4556 ret = ocfs2_read_xattr_bucket(first, new_blkno); 4557 if (ret) { 4558 mlog_errno(ret); 4559 goto out; 4560 } 4561 ret = ocfs2_read_xattr_bucket(target, src_blkno); 4562 if (ret) 4563 mlog_errno(ret); 4564 4565 } 4566 4567 out: 4568 return ret; 4569 } 4570 4571 /* 4572 * Find the suitable pos when we divide a bucket into 2. 4573 * We have to make sure the xattrs with the same hash value exist 4574 * in the same bucket. 4575 * 4576 * If this ocfs2_xattr_header covers more than one hash value, find a 4577 * place where the hash value changes. Try to find the most even split. 4578 * The most common case is that all entries have different hash values, 4579 * and the first check we make will find a place to split. 4580 */ 4581 static int ocfs2_xattr_find_divide_pos(struct ocfs2_xattr_header *xh) 4582 { 4583 struct ocfs2_xattr_entry *entries = xh->xh_entries; 4584 int count = le16_to_cpu(xh->xh_count); 4585 int delta, middle = count / 2; 4586 4587 /* 4588 * We start at the middle. Each step gets farther away in both 4589 * directions. We therefore hit the change in hash value 4590 * nearest to the middle. Note that this loop does not execute for 4591 * count < 2. 4592 */ 4593 for (delta = 0; delta < middle; delta++) { 4594 /* Let's check delta earlier than middle */ 4595 if (cmp_xe(&entries[middle - delta - 1], 4596 &entries[middle - delta])) 4597 return middle - delta; 4598 4599 /* For even counts, don't walk off the end */ 4600 if ((middle + delta + 1) == count) 4601 continue; 4602 4603 /* Now try delta past middle */ 4604 if (cmp_xe(&entries[middle + delta], 4605 &entries[middle + delta + 1])) 4606 return middle + delta + 1; 4607 } 4608 4609 /* Every entry had the same hash */ 4610 return count; 4611 } 4612 4613 /* 4614 * Move some xattrs in old bucket(blk) to new bucket(new_blk). 4615 * first_hash will record the 1st hash of the new bucket. 4616 * 4617 * Normally half of the xattrs will be moved. But we have to make 4618 * sure that the xattrs with the same hash value are stored in the 4619 * same bucket. If all the xattrs in this bucket have the same hash 4620 * value, the new bucket will be initialized as an empty one and the 4621 * first_hash will be initialized as (hash_value+1). 4622 */ 4623 static int ocfs2_divide_xattr_bucket(struct inode *inode, 4624 handle_t *handle, 4625 u64 blk, 4626 u64 new_blk, 4627 u32 *first_hash, 4628 int new_bucket_head) 4629 { 4630 int ret, i; 4631 int count, start, len, name_value_len = 0, name_offset = 0; 4632 struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL; 4633 struct ocfs2_xattr_header *xh; 4634 struct ocfs2_xattr_entry *xe; 4635 int blocksize = inode->i_sb->s_blocksize; 4636 4637 trace_ocfs2_divide_xattr_bucket_begin((unsigned long long)blk, 4638 (unsigned long long)new_blk); 4639 4640 s_bucket = ocfs2_xattr_bucket_new(inode); 4641 t_bucket = ocfs2_xattr_bucket_new(inode); 4642 if (!s_bucket || !t_bucket) { 4643 ret = -ENOMEM; 4644 mlog_errno(ret); 4645 goto out; 4646 } 4647 4648 ret = ocfs2_read_xattr_bucket(s_bucket, blk); 4649 if (ret) { 4650 mlog_errno(ret); 4651 goto out; 4652 } 4653 4654 ret = ocfs2_xattr_bucket_journal_access(handle, s_bucket, 4655 OCFS2_JOURNAL_ACCESS_WRITE); 4656 if (ret) { 4657 mlog_errno(ret); 4658 goto out; 4659 } 4660 4661 /* 4662 * Even if !new_bucket_head, we're overwriting t_bucket. Thus, 4663 * there's no need to read it. 4664 */ 4665 ret = ocfs2_init_xattr_bucket(t_bucket, new_blk, new_bucket_head); 4666 if (ret) { 4667 mlog_errno(ret); 4668 goto out; 4669 } 4670 4671 /* 4672 * Hey, if we're overwriting t_bucket, what difference does 4673 * ACCESS_CREATE vs ACCESS_WRITE make? See the comment in the 4674 * same part of ocfs2_cp_xattr_bucket(). 4675 */ 4676 ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket, 4677 new_bucket_head ? 4678 OCFS2_JOURNAL_ACCESS_CREATE : 4679 OCFS2_JOURNAL_ACCESS_WRITE); 4680 if (ret) { 4681 mlog_errno(ret); 4682 goto out; 4683 } 4684 4685 xh = bucket_xh(s_bucket); 4686 count = le16_to_cpu(xh->xh_count); 4687 start = ocfs2_xattr_find_divide_pos(xh); 4688 4689 if (start == count) { 4690 xe = &xh->xh_entries[start-1]; 4691 4692 /* 4693 * initialized a new empty bucket here. 4694 * The hash value is set as one larger than 4695 * that of the last entry in the previous bucket. 4696 */ 4697 for (i = 0; i < t_bucket->bu_blocks; i++) 4698 memset(bucket_block(t_bucket, i), 0, blocksize); 4699 4700 xh = bucket_xh(t_bucket); 4701 xh->xh_free_start = cpu_to_le16(blocksize); 4702 xh->xh_entries[0].xe_name_hash = xe->xe_name_hash; 4703 le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1); 4704 4705 goto set_num_buckets; 4706 } 4707 4708 /* copy the whole bucket to the new first. */ 4709 ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket); 4710 4711 /* update the new bucket. */ 4712 xh = bucket_xh(t_bucket); 4713 4714 /* 4715 * Calculate the total name/value len and xh_free_start for 4716 * the old bucket first. 4717 */ 4718 name_offset = OCFS2_XATTR_BUCKET_SIZE; 4719 name_value_len = 0; 4720 for (i = 0; i < start; i++) { 4721 xe = &xh->xh_entries[i]; 4722 name_value_len += namevalue_size_xe(xe); 4723 if (le16_to_cpu(xe->xe_name_offset) < name_offset) 4724 name_offset = le16_to_cpu(xe->xe_name_offset); 4725 } 4726 4727 /* 4728 * Now begin the modification to the new bucket. 4729 * 4730 * In the new bucket, We just move the xattr entry to the beginning 4731 * and don't touch the name/value. So there will be some holes in the 4732 * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is 4733 * called. 4734 */ 4735 xe = &xh->xh_entries[start]; 4736 len = sizeof(struct ocfs2_xattr_entry) * (count - start); 4737 trace_ocfs2_divide_xattr_bucket_move(len, 4738 (int)((char *)xe - (char *)xh), 4739 (int)((char *)xh->xh_entries - (char *)xh)); 4740 memmove((char *)xh->xh_entries, (char *)xe, len); 4741 xe = &xh->xh_entries[count - start]; 4742 len = sizeof(struct ocfs2_xattr_entry) * start; 4743 memset((char *)xe, 0, len); 4744 4745 le16_add_cpu(&xh->xh_count, -start); 4746 le16_add_cpu(&xh->xh_name_value_len, -name_value_len); 4747 4748 /* Calculate xh_free_start for the new bucket. */ 4749 xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE); 4750 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 4751 xe = &xh->xh_entries[i]; 4752 if (le16_to_cpu(xe->xe_name_offset) < 4753 le16_to_cpu(xh->xh_free_start)) 4754 xh->xh_free_start = xe->xe_name_offset; 4755 } 4756 4757 set_num_buckets: 4758 /* set xh->xh_num_buckets for the new xh. */ 4759 if (new_bucket_head) 4760 xh->xh_num_buckets = cpu_to_le16(1); 4761 else 4762 xh->xh_num_buckets = 0; 4763 4764 ocfs2_xattr_bucket_journal_dirty(handle, t_bucket); 4765 4766 /* store the first_hash of the new bucket. */ 4767 if (first_hash) 4768 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash); 4769 4770 /* 4771 * Now only update the 1st block of the old bucket. If we 4772 * just added a new empty bucket, there is no need to modify 4773 * it. 4774 */ 4775 if (start == count) 4776 goto out; 4777 4778 xh = bucket_xh(s_bucket); 4779 memset(&xh->xh_entries[start], 0, 4780 sizeof(struct ocfs2_xattr_entry) * (count - start)); 4781 xh->xh_count = cpu_to_le16(start); 4782 xh->xh_free_start = cpu_to_le16(name_offset); 4783 xh->xh_name_value_len = cpu_to_le16(name_value_len); 4784 4785 ocfs2_xattr_bucket_journal_dirty(handle, s_bucket); 4786 4787 out: 4788 ocfs2_xattr_bucket_free(s_bucket); 4789 ocfs2_xattr_bucket_free(t_bucket); 4790 4791 return ret; 4792 } 4793 4794 /* 4795 * Copy xattr from one bucket to another bucket. 4796 * 4797 * The caller must make sure that the journal transaction 4798 * has enough space for journaling. 4799 */ 4800 static int ocfs2_cp_xattr_bucket(struct inode *inode, 4801 handle_t *handle, 4802 u64 s_blkno, 4803 u64 t_blkno, 4804 int t_is_new) 4805 { 4806 int ret; 4807 struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL; 4808 4809 BUG_ON(s_blkno == t_blkno); 4810 4811 trace_ocfs2_cp_xattr_bucket((unsigned long long)s_blkno, 4812 (unsigned long long)t_blkno, 4813 t_is_new); 4814 4815 s_bucket = ocfs2_xattr_bucket_new(inode); 4816 t_bucket = ocfs2_xattr_bucket_new(inode); 4817 if (!s_bucket || !t_bucket) { 4818 ret = -ENOMEM; 4819 mlog_errno(ret); 4820 goto out; 4821 } 4822 4823 ret = ocfs2_read_xattr_bucket(s_bucket, s_blkno); 4824 if (ret) 4825 goto out; 4826 4827 /* 4828 * Even if !t_is_new, we're overwriting t_bucket. Thus, 4829 * there's no need to read it. 4830 */ 4831 ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno, t_is_new); 4832 if (ret) 4833 goto out; 4834 4835 /* 4836 * Hey, if we're overwriting t_bucket, what difference does 4837 * ACCESS_CREATE vs ACCESS_WRITE make? Well, if we allocated a new 4838 * cluster to fill, we came here from 4839 * ocfs2_mv_xattr_buckets(), and it is really new - 4840 * ACCESS_CREATE is required. But we also might have moved data 4841 * out of t_bucket before extending back into it. 4842 * ocfs2_add_new_xattr_bucket() can do this - its call to 4843 * ocfs2_add_new_xattr_cluster() may have created a new extent 4844 * and copied out the end of the old extent. Then it re-extends 4845 * the old extent back to create space for new xattrs. That's 4846 * how we get here, and the bucket isn't really new. 4847 */ 4848 ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket, 4849 t_is_new ? 4850 OCFS2_JOURNAL_ACCESS_CREATE : 4851 OCFS2_JOURNAL_ACCESS_WRITE); 4852 if (ret) 4853 goto out; 4854 4855 ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket); 4856 ocfs2_xattr_bucket_journal_dirty(handle, t_bucket); 4857 4858 out: 4859 ocfs2_xattr_bucket_free(t_bucket); 4860 ocfs2_xattr_bucket_free(s_bucket); 4861 4862 return ret; 4863 } 4864 4865 /* 4866 * src_blk points to the start of an existing extent. last_blk points to 4867 * last cluster in that extent. to_blk points to a newly allocated 4868 * extent. We copy the buckets from the cluster at last_blk to the new 4869 * extent. If start_bucket is non-zero, we skip that many buckets before 4870 * we start copying. The new extent's xh_num_buckets gets set to the 4871 * number of buckets we copied. The old extent's xh_num_buckets shrinks 4872 * by the same amount. 4873 */ 4874 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle, 4875 u64 src_blk, u64 last_blk, u64 to_blk, 4876 unsigned int start_bucket, 4877 u32 *first_hash) 4878 { 4879 int i, ret, credits; 4880 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 4881 int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 4882 int num_buckets = ocfs2_xattr_buckets_per_cluster(osb); 4883 struct ocfs2_xattr_bucket *old_first, *new_first; 4884 4885 trace_ocfs2_mv_xattr_buckets((unsigned long long)last_blk, 4886 (unsigned long long)to_blk); 4887 4888 BUG_ON(start_bucket >= num_buckets); 4889 if (start_bucket) { 4890 num_buckets -= start_bucket; 4891 last_blk += (start_bucket * blks_per_bucket); 4892 } 4893 4894 /* The first bucket of the original extent */ 4895 old_first = ocfs2_xattr_bucket_new(inode); 4896 /* The first bucket of the new extent */ 4897 new_first = ocfs2_xattr_bucket_new(inode); 4898 if (!old_first || !new_first) { 4899 ret = -ENOMEM; 4900 mlog_errno(ret); 4901 goto out; 4902 } 4903 4904 ret = ocfs2_read_xattr_bucket(old_first, src_blk); 4905 if (ret) { 4906 mlog_errno(ret); 4907 goto out; 4908 } 4909 4910 /* 4911 * We need to update the first bucket of the old extent and all 4912 * the buckets going to the new extent. 4913 */ 4914 credits = ((num_buckets + 1) * blks_per_bucket); 4915 ret = ocfs2_extend_trans(handle, credits); 4916 if (ret) { 4917 mlog_errno(ret); 4918 goto out; 4919 } 4920 4921 ret = ocfs2_xattr_bucket_journal_access(handle, old_first, 4922 OCFS2_JOURNAL_ACCESS_WRITE); 4923 if (ret) { 4924 mlog_errno(ret); 4925 goto out; 4926 } 4927 4928 for (i = 0; i < num_buckets; i++) { 4929 ret = ocfs2_cp_xattr_bucket(inode, handle, 4930 last_blk + (i * blks_per_bucket), 4931 to_blk + (i * blks_per_bucket), 4932 1); 4933 if (ret) { 4934 mlog_errno(ret); 4935 goto out; 4936 } 4937 } 4938 4939 /* 4940 * Get the new bucket ready before we dirty anything 4941 * (This actually shouldn't fail, because we already dirtied 4942 * it once in ocfs2_cp_xattr_bucket()). 4943 */ 4944 ret = ocfs2_read_xattr_bucket(new_first, to_blk); 4945 if (ret) { 4946 mlog_errno(ret); 4947 goto out; 4948 } 4949 ret = ocfs2_xattr_bucket_journal_access(handle, new_first, 4950 OCFS2_JOURNAL_ACCESS_WRITE); 4951 if (ret) { 4952 mlog_errno(ret); 4953 goto out; 4954 } 4955 4956 /* Now update the headers */ 4957 le16_add_cpu(&bucket_xh(old_first)->xh_num_buckets, -num_buckets); 4958 ocfs2_xattr_bucket_journal_dirty(handle, old_first); 4959 4960 bucket_xh(new_first)->xh_num_buckets = cpu_to_le16(num_buckets); 4961 ocfs2_xattr_bucket_journal_dirty(handle, new_first); 4962 4963 if (first_hash) 4964 *first_hash = le32_to_cpu(bucket_xh(new_first)->xh_entries[0].xe_name_hash); 4965 4966 out: 4967 ocfs2_xattr_bucket_free(new_first); 4968 ocfs2_xattr_bucket_free(old_first); 4969 return ret; 4970 } 4971 4972 /* 4973 * Move some xattrs in this cluster to the new cluster. 4974 * This function should only be called when bucket size == cluster size. 4975 * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead. 4976 */ 4977 static int ocfs2_divide_xattr_cluster(struct inode *inode, 4978 handle_t *handle, 4979 u64 prev_blk, 4980 u64 new_blk, 4981 u32 *first_hash) 4982 { 4983 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 4984 int ret, credits = 2 * blk_per_bucket; 4985 4986 BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize); 4987 4988 ret = ocfs2_extend_trans(handle, credits); 4989 if (ret) { 4990 mlog_errno(ret); 4991 return ret; 4992 } 4993 4994 /* Move half of the xattr in start_blk to the next bucket. */ 4995 return ocfs2_divide_xattr_bucket(inode, handle, prev_blk, 4996 new_blk, first_hash, 1); 4997 } 4998 4999 /* 5000 * Move some xattrs from the old cluster to the new one since they are not 5001 * contiguous in ocfs2 xattr tree. 5002 * 5003 * new_blk starts a new separate cluster, and we will move some xattrs from 5004 * prev_blk to it. v_start will be set as the first name hash value in this 5005 * new cluster so that it can be used as e_cpos during tree insertion and 5006 * don't collide with our original b-tree operations. first_bh and header_bh 5007 * will also be updated since they will be used in ocfs2_extend_xattr_bucket 5008 * to extend the insert bucket. 5009 * 5010 * The problem is how much xattr should we move to the new one and when should 5011 * we update first_bh and header_bh? 5012 * 1. If cluster size > bucket size, that means the previous cluster has more 5013 * than 1 bucket, so just move half nums of bucket into the new cluster and 5014 * update the first_bh and header_bh if the insert bucket has been moved 5015 * to the new cluster. 5016 * 2. If cluster_size == bucket_size: 5017 * a) If the previous extent rec has more than one cluster and the insert 5018 * place isn't in the last cluster, copy the entire last cluster to the 5019 * new one. This time, we don't need to upate the first_bh and header_bh 5020 * since they will not be moved into the new cluster. 5021 * b) Otherwise, move the bottom half of the xattrs in the last cluster into 5022 * the new one. And we set the extend flag to zero if the insert place is 5023 * moved into the new allocated cluster since no extend is needed. 5024 */ 5025 static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode, 5026 handle_t *handle, 5027 struct ocfs2_xattr_bucket *first, 5028 struct ocfs2_xattr_bucket *target, 5029 u64 new_blk, 5030 u32 prev_clusters, 5031 u32 *v_start, 5032 int *extend) 5033 { 5034 int ret; 5035 5036 trace_ocfs2_adjust_xattr_cross_cluster( 5037 (unsigned long long)bucket_blkno(first), 5038 (unsigned long long)new_blk, prev_clusters); 5039 5040 if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) { 5041 ret = ocfs2_mv_xattr_bucket_cross_cluster(inode, 5042 handle, 5043 first, target, 5044 new_blk, 5045 prev_clusters, 5046 v_start); 5047 if (ret) 5048 mlog_errno(ret); 5049 } else { 5050 /* The start of the last cluster in the first extent */ 5051 u64 last_blk = bucket_blkno(first) + 5052 ((prev_clusters - 1) * 5053 ocfs2_clusters_to_blocks(inode->i_sb, 1)); 5054 5055 if (prev_clusters > 1 && bucket_blkno(target) != last_blk) { 5056 ret = ocfs2_mv_xattr_buckets(inode, handle, 5057 bucket_blkno(first), 5058 last_blk, new_blk, 0, 5059 v_start); 5060 if (ret) 5061 mlog_errno(ret); 5062 } else { 5063 ret = ocfs2_divide_xattr_cluster(inode, handle, 5064 last_blk, new_blk, 5065 v_start); 5066 if (ret) 5067 mlog_errno(ret); 5068 5069 if ((bucket_blkno(target) == last_blk) && extend) 5070 *extend = 0; 5071 } 5072 } 5073 5074 return ret; 5075 } 5076 5077 /* 5078 * Add a new cluster for xattr storage. 5079 * 5080 * If the new cluster is contiguous with the previous one, it will be 5081 * appended to the same extent record, and num_clusters will be updated. 5082 * If not, we will insert a new extent for it and move some xattrs in 5083 * the last cluster into the new allocated one. 5084 * We also need to limit the maximum size of a btree leaf, otherwise we'll 5085 * lose the benefits of hashing because we'll have to search large leaves. 5086 * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize, 5087 * if it's bigger). 5088 * 5089 * first_bh is the first block of the previous extent rec and header_bh 5090 * indicates the bucket we will insert the new xattrs. They will be updated 5091 * when the header_bh is moved into the new cluster. 5092 */ 5093 static int ocfs2_add_new_xattr_cluster(struct inode *inode, 5094 struct buffer_head *root_bh, 5095 struct ocfs2_xattr_bucket *first, 5096 struct ocfs2_xattr_bucket *target, 5097 u32 *num_clusters, 5098 u32 prev_cpos, 5099 int *extend, 5100 struct ocfs2_xattr_set_ctxt *ctxt) 5101 { 5102 int ret; 5103 u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 5104 u32 prev_clusters = *num_clusters; 5105 u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0; 5106 u64 block; 5107 handle_t *handle = ctxt->handle; 5108 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5109 struct ocfs2_extent_tree et; 5110 5111 trace_ocfs2_add_new_xattr_cluster_begin( 5112 (unsigned long long)OCFS2_I(inode)->ip_blkno, 5113 (unsigned long long)bucket_blkno(first), 5114 prev_cpos, prev_clusters); 5115 5116 ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh); 5117 5118 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh, 5119 OCFS2_JOURNAL_ACCESS_WRITE); 5120 if (ret < 0) { 5121 mlog_errno(ret); 5122 goto leave; 5123 } 5124 5125 ret = __ocfs2_claim_clusters(handle, ctxt->data_ac, 1, 5126 clusters_to_add, &bit_off, &num_bits); 5127 if (ret < 0) { 5128 if (ret != -ENOSPC) 5129 mlog_errno(ret); 5130 goto leave; 5131 } 5132 5133 BUG_ON(num_bits > clusters_to_add); 5134 5135 block = ocfs2_clusters_to_blocks(osb->sb, bit_off); 5136 trace_ocfs2_add_new_xattr_cluster((unsigned long long)block, num_bits); 5137 5138 if (bucket_blkno(first) + (prev_clusters * bpc) == block && 5139 (prev_clusters + num_bits) << osb->s_clustersize_bits <= 5140 OCFS2_MAX_XATTR_TREE_LEAF_SIZE) { 5141 /* 5142 * If this cluster is contiguous with the old one and 5143 * adding this new cluster, we don't surpass the limit of 5144 * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be 5145 * initialized and used like other buckets in the previous 5146 * cluster. 5147 * So add it as a contiguous one. The caller will handle 5148 * its init process. 5149 */ 5150 v_start = prev_cpos + prev_clusters; 5151 *num_clusters = prev_clusters + num_bits; 5152 } else { 5153 ret = ocfs2_adjust_xattr_cross_cluster(inode, 5154 handle, 5155 first, 5156 target, 5157 block, 5158 prev_clusters, 5159 &v_start, 5160 extend); 5161 if (ret) { 5162 mlog_errno(ret); 5163 goto leave; 5164 } 5165 } 5166 5167 trace_ocfs2_add_new_xattr_cluster_insert((unsigned long long)block, 5168 v_start, num_bits); 5169 ret = ocfs2_insert_extent(handle, &et, v_start, block, 5170 num_bits, 0, ctxt->meta_ac); 5171 if (ret < 0) { 5172 mlog_errno(ret); 5173 goto leave; 5174 } 5175 5176 ocfs2_journal_dirty(handle, root_bh); 5177 5178 leave: 5179 return ret; 5180 } 5181 5182 /* 5183 * We are given an extent. 'first' is the bucket at the very front of 5184 * the extent. The extent has space for an additional bucket past 5185 * bucket_xh(first)->xh_num_buckets. 'target_blkno' is the block number 5186 * of the target bucket. We wish to shift every bucket past the target 5187 * down one, filling in that additional space. When we get back to the 5188 * target, we split the target between itself and the now-empty bucket 5189 * at target+1 (aka, target_blkno + blks_per_bucket). 5190 */ 5191 static int ocfs2_extend_xattr_bucket(struct inode *inode, 5192 handle_t *handle, 5193 struct ocfs2_xattr_bucket *first, 5194 u64 target_blk, 5195 u32 num_clusters) 5196 { 5197 int ret, credits; 5198 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5199 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 5200 u64 end_blk; 5201 u16 new_bucket = le16_to_cpu(bucket_xh(first)->xh_num_buckets); 5202 5203 trace_ocfs2_extend_xattr_bucket((unsigned long long)target_blk, 5204 (unsigned long long)bucket_blkno(first), 5205 num_clusters, new_bucket); 5206 5207 /* The extent must have room for an additional bucket */ 5208 BUG_ON(new_bucket >= 5209 (num_clusters * ocfs2_xattr_buckets_per_cluster(osb))); 5210 5211 /* end_blk points to the last existing bucket */ 5212 end_blk = bucket_blkno(first) + ((new_bucket - 1) * blk_per_bucket); 5213 5214 /* 5215 * end_blk is the start of the last existing bucket. 5216 * Thus, (end_blk - target_blk) covers the target bucket and 5217 * every bucket after it up to, but not including, the last 5218 * existing bucket. Then we add the last existing bucket, the 5219 * new bucket, and the first bucket (3 * blk_per_bucket). 5220 */ 5221 credits = (end_blk - target_blk) + (3 * blk_per_bucket); 5222 ret = ocfs2_extend_trans(handle, credits); 5223 if (ret) { 5224 mlog_errno(ret); 5225 goto out; 5226 } 5227 5228 ret = ocfs2_xattr_bucket_journal_access(handle, first, 5229 OCFS2_JOURNAL_ACCESS_WRITE); 5230 if (ret) { 5231 mlog_errno(ret); 5232 goto out; 5233 } 5234 5235 while (end_blk != target_blk) { 5236 ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk, 5237 end_blk + blk_per_bucket, 0); 5238 if (ret) 5239 goto out; 5240 end_blk -= blk_per_bucket; 5241 } 5242 5243 /* Move half of the xattr in target_blkno to the next bucket. */ 5244 ret = ocfs2_divide_xattr_bucket(inode, handle, target_blk, 5245 target_blk + blk_per_bucket, NULL, 0); 5246 5247 le16_add_cpu(&bucket_xh(first)->xh_num_buckets, 1); 5248 ocfs2_xattr_bucket_journal_dirty(handle, first); 5249 5250 out: 5251 return ret; 5252 } 5253 5254 /* 5255 * Add new xattr bucket in an extent record and adjust the buckets 5256 * accordingly. xb_bh is the ocfs2_xattr_block, and target is the 5257 * bucket we want to insert into. 5258 * 5259 * In the easy case, we will move all the buckets after target down by 5260 * one. Half of target's xattrs will be moved to the next bucket. 5261 * 5262 * If current cluster is full, we'll allocate a new one. This may not 5263 * be contiguous. The underlying calls will make sure that there is 5264 * space for the insert, shifting buckets around if necessary. 5265 * 'target' may be moved by those calls. 5266 */ 5267 static int ocfs2_add_new_xattr_bucket(struct inode *inode, 5268 struct buffer_head *xb_bh, 5269 struct ocfs2_xattr_bucket *target, 5270 struct ocfs2_xattr_set_ctxt *ctxt) 5271 { 5272 struct ocfs2_xattr_block *xb = 5273 (struct ocfs2_xattr_block *)xb_bh->b_data; 5274 struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root; 5275 struct ocfs2_extent_list *el = &xb_root->xt_list; 5276 u32 name_hash = 5277 le32_to_cpu(bucket_xh(target)->xh_entries[0].xe_name_hash); 5278 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5279 int ret, num_buckets, extend = 1; 5280 u64 p_blkno; 5281 u32 e_cpos, num_clusters; 5282 /* The bucket at the front of the extent */ 5283 struct ocfs2_xattr_bucket *first; 5284 5285 trace_ocfs2_add_new_xattr_bucket( 5286 (unsigned long long)bucket_blkno(target)); 5287 5288 /* The first bucket of the original extent */ 5289 first = ocfs2_xattr_bucket_new(inode); 5290 if (!first) { 5291 ret = -ENOMEM; 5292 mlog_errno(ret); 5293 goto out; 5294 } 5295 5296 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos, 5297 &num_clusters, el); 5298 if (ret) { 5299 mlog_errno(ret); 5300 goto out; 5301 } 5302 5303 ret = ocfs2_read_xattr_bucket(first, p_blkno); 5304 if (ret) { 5305 mlog_errno(ret); 5306 goto out; 5307 } 5308 5309 num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters; 5310 if (num_buckets == le16_to_cpu(bucket_xh(first)->xh_num_buckets)) { 5311 /* 5312 * This can move first+target if the target bucket moves 5313 * to the new extent. 5314 */ 5315 ret = ocfs2_add_new_xattr_cluster(inode, 5316 xb_bh, 5317 first, 5318 target, 5319 &num_clusters, 5320 e_cpos, 5321 &extend, 5322 ctxt); 5323 if (ret) { 5324 mlog_errno(ret); 5325 goto out; 5326 } 5327 } 5328 5329 if (extend) { 5330 ret = ocfs2_extend_xattr_bucket(inode, 5331 ctxt->handle, 5332 first, 5333 bucket_blkno(target), 5334 num_clusters); 5335 if (ret) 5336 mlog_errno(ret); 5337 } 5338 5339 out: 5340 ocfs2_xattr_bucket_free(first); 5341 5342 return ret; 5343 } 5344 5345 /* 5346 * Truncate the specified xe_off entry in xattr bucket. 5347 * bucket is indicated by header_bh and len is the new length. 5348 * Both the ocfs2_xattr_value_root and the entry will be updated here. 5349 * 5350 * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed. 5351 */ 5352 static int ocfs2_xattr_bucket_value_truncate(struct inode *inode, 5353 struct ocfs2_xattr_bucket *bucket, 5354 int xe_off, 5355 int len, 5356 struct ocfs2_xattr_set_ctxt *ctxt) 5357 { 5358 int ret, offset; 5359 u64 value_blk; 5360 struct ocfs2_xattr_entry *xe; 5361 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 5362 size_t blocksize = inode->i_sb->s_blocksize; 5363 struct ocfs2_xattr_value_buf vb = { 5364 .vb_access = ocfs2_journal_access, 5365 }; 5366 5367 xe = &xh->xh_entries[xe_off]; 5368 5369 BUG_ON(!xe || ocfs2_xattr_is_local(xe)); 5370 5371 offset = le16_to_cpu(xe->xe_name_offset) + 5372 OCFS2_XATTR_SIZE(xe->xe_name_len); 5373 5374 value_blk = offset / blocksize; 5375 5376 /* We don't allow ocfs2_xattr_value to be stored in different block. */ 5377 BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize); 5378 5379 vb.vb_bh = bucket->bu_bhs[value_blk]; 5380 BUG_ON(!vb.vb_bh); 5381 5382 vb.vb_xv = (struct ocfs2_xattr_value_root *) 5383 (vb.vb_bh->b_data + offset % blocksize); 5384 5385 /* 5386 * From here on out we have to dirty the bucket. The generic 5387 * value calls only modify one of the bucket's bhs, but we need 5388 * to send the bucket at once. So if they error, they *could* have 5389 * modified something. We have to assume they did, and dirty 5390 * the whole bucket. This leaves us in a consistent state. 5391 */ 5392 trace_ocfs2_xattr_bucket_value_truncate( 5393 (unsigned long long)bucket_blkno(bucket), xe_off, len); 5394 ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt); 5395 if (ret) { 5396 mlog_errno(ret); 5397 goto out; 5398 } 5399 5400 ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket, 5401 OCFS2_JOURNAL_ACCESS_WRITE); 5402 if (ret) { 5403 mlog_errno(ret); 5404 goto out; 5405 } 5406 5407 xe->xe_value_size = cpu_to_le64(len); 5408 5409 ocfs2_xattr_bucket_journal_dirty(ctxt->handle, bucket); 5410 5411 out: 5412 return ret; 5413 } 5414 5415 static int ocfs2_rm_xattr_cluster(struct inode *inode, 5416 struct buffer_head *root_bh, 5417 u64 blkno, 5418 u32 cpos, 5419 u32 len, 5420 void *para) 5421 { 5422 int ret; 5423 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5424 struct inode *tl_inode = osb->osb_tl_inode; 5425 handle_t *handle; 5426 struct ocfs2_xattr_block *xb = 5427 (struct ocfs2_xattr_block *)root_bh->b_data; 5428 struct ocfs2_alloc_context *meta_ac = NULL; 5429 struct ocfs2_cached_dealloc_ctxt dealloc; 5430 struct ocfs2_extent_tree et; 5431 5432 ret = ocfs2_iterate_xattr_buckets(inode, blkno, len, 5433 ocfs2_delete_xattr_in_bucket, para); 5434 if (ret) { 5435 mlog_errno(ret); 5436 return ret; 5437 } 5438 5439 ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh); 5440 5441 ocfs2_init_dealloc_ctxt(&dealloc); 5442 5443 trace_ocfs2_rm_xattr_cluster( 5444 (unsigned long long)OCFS2_I(inode)->ip_blkno, 5445 (unsigned long long)blkno, cpos, len); 5446 5447 ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), blkno, 5448 len); 5449 5450 ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac); 5451 if (ret) { 5452 mlog_errno(ret); 5453 return ret; 5454 } 5455 5456 inode_lock(tl_inode); 5457 5458 if (ocfs2_truncate_log_needs_flush(osb)) { 5459 ret = __ocfs2_flush_truncate_log(osb); 5460 if (ret < 0) { 5461 mlog_errno(ret); 5462 goto out; 5463 } 5464 } 5465 5466 handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb)); 5467 if (IS_ERR(handle)) { 5468 ret = -ENOMEM; 5469 mlog_errno(ret); 5470 goto out; 5471 } 5472 5473 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh, 5474 OCFS2_JOURNAL_ACCESS_WRITE); 5475 if (ret) { 5476 mlog_errno(ret); 5477 goto out_commit; 5478 } 5479 5480 ret = ocfs2_remove_extent(handle, &et, cpos, len, meta_ac, 5481 &dealloc); 5482 if (ret) { 5483 mlog_errno(ret); 5484 goto out_commit; 5485 } 5486 5487 le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len); 5488 ocfs2_journal_dirty(handle, root_bh); 5489 5490 ret = ocfs2_truncate_log_append(osb, handle, blkno, len); 5491 if (ret) 5492 mlog_errno(ret); 5493 ocfs2_update_inode_fsync_trans(handle, inode, 0); 5494 5495 out_commit: 5496 ocfs2_commit_trans(osb, handle); 5497 out: 5498 ocfs2_schedule_truncate_log_flush(osb, 1); 5499 5500 inode_unlock(tl_inode); 5501 5502 if (meta_ac) 5503 ocfs2_free_alloc_context(meta_ac); 5504 5505 ocfs2_run_deallocs(osb, &dealloc); 5506 5507 return ret; 5508 } 5509 5510 /* 5511 * check whether the xattr bucket is filled up with the same hash value. 5512 * If we want to insert the xattr with the same hash, return -ENOSPC. 5513 * If we want to insert a xattr with different hash value, go ahead 5514 * and ocfs2_divide_xattr_bucket will handle this. 5515 */ 5516 static int ocfs2_check_xattr_bucket_collision(struct inode *inode, 5517 struct ocfs2_xattr_bucket *bucket, 5518 const char *name) 5519 { 5520 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 5521 u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name)); 5522 5523 if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash)) 5524 return 0; 5525 5526 if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash == 5527 xh->xh_entries[0].xe_name_hash) { 5528 mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, " 5529 "hash = %u\n", 5530 (unsigned long long)bucket_blkno(bucket), 5531 le32_to_cpu(xh->xh_entries[0].xe_name_hash)); 5532 return -ENOSPC; 5533 } 5534 5535 return 0; 5536 } 5537 5538 /* 5539 * Try to set the entry in the current bucket. If we fail, the caller 5540 * will handle getting us another bucket. 5541 */ 5542 static int ocfs2_xattr_set_entry_bucket(struct inode *inode, 5543 struct ocfs2_xattr_info *xi, 5544 struct ocfs2_xattr_search *xs, 5545 struct ocfs2_xattr_set_ctxt *ctxt) 5546 { 5547 int ret; 5548 struct ocfs2_xa_loc loc; 5549 5550 trace_ocfs2_xattr_set_entry_bucket(xi->xi_name); 5551 5552 ocfs2_init_xattr_bucket_xa_loc(&loc, xs->bucket, 5553 xs->not_found ? NULL : xs->here); 5554 ret = ocfs2_xa_set(&loc, xi, ctxt); 5555 if (!ret) { 5556 xs->here = loc.xl_entry; 5557 goto out; 5558 } 5559 if (ret != -ENOSPC) { 5560 mlog_errno(ret); 5561 goto out; 5562 } 5563 5564 /* Ok, we need space. Let's try defragmenting the bucket. */ 5565 ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle, 5566 xs->bucket); 5567 if (ret) { 5568 mlog_errno(ret); 5569 goto out; 5570 } 5571 5572 ret = ocfs2_xa_set(&loc, xi, ctxt); 5573 if (!ret) { 5574 xs->here = loc.xl_entry; 5575 goto out; 5576 } 5577 if (ret != -ENOSPC) 5578 mlog_errno(ret); 5579 5580 5581 out: 5582 return ret; 5583 } 5584 5585 static int ocfs2_xattr_set_entry_index_block(struct inode *inode, 5586 struct ocfs2_xattr_info *xi, 5587 struct ocfs2_xattr_search *xs, 5588 struct ocfs2_xattr_set_ctxt *ctxt) 5589 { 5590 int ret; 5591 5592 trace_ocfs2_xattr_set_entry_index_block(xi->xi_name); 5593 5594 ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt); 5595 if (!ret) 5596 goto out; 5597 if (ret != -ENOSPC) { 5598 mlog_errno(ret); 5599 goto out; 5600 } 5601 5602 /* Ack, need more space. Let's try to get another bucket! */ 5603 5604 /* 5605 * We do not allow for overlapping ranges between buckets. And 5606 * the maximum number of collisions we will allow for then is 5607 * one bucket's worth, so check it here whether we need to 5608 * add a new bucket for the insert. 5609 */ 5610 ret = ocfs2_check_xattr_bucket_collision(inode, 5611 xs->bucket, 5612 xi->xi_name); 5613 if (ret) { 5614 mlog_errno(ret); 5615 goto out; 5616 } 5617 5618 ret = ocfs2_add_new_xattr_bucket(inode, 5619 xs->xattr_bh, 5620 xs->bucket, 5621 ctxt); 5622 if (ret) { 5623 mlog_errno(ret); 5624 goto out; 5625 } 5626 5627 /* 5628 * ocfs2_add_new_xattr_bucket() will have updated 5629 * xs->bucket if it moved, but it will not have updated 5630 * any of the other search fields. Thus, we drop it and 5631 * re-search. Everything should be cached, so it'll be 5632 * quick. 5633 */ 5634 ocfs2_xattr_bucket_relse(xs->bucket); 5635 ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh, 5636 xi->xi_name_index, 5637 xi->xi_name, xs); 5638 if (ret && ret != -ENODATA) 5639 goto out; 5640 xs->not_found = ret; 5641 5642 /* Ok, we have a new bucket, let's try again */ 5643 ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt); 5644 if (ret && (ret != -ENOSPC)) 5645 mlog_errno(ret); 5646 5647 out: 5648 return ret; 5649 } 5650 5651 static int ocfs2_delete_xattr_in_bucket(struct inode *inode, 5652 struct ocfs2_xattr_bucket *bucket, 5653 void *para) 5654 { 5655 int ret = 0, ref_credits; 5656 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 5657 u16 i; 5658 struct ocfs2_xattr_entry *xe; 5659 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5660 struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,}; 5661 int credits = ocfs2_remove_extent_credits(osb->sb) + 5662 ocfs2_blocks_per_xattr_bucket(inode->i_sb); 5663 struct ocfs2_xattr_value_root *xv; 5664 struct ocfs2_rm_xattr_bucket_para *args = 5665 (struct ocfs2_rm_xattr_bucket_para *)para; 5666 5667 ocfs2_init_dealloc_ctxt(&ctxt.dealloc); 5668 5669 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 5670 xe = &xh->xh_entries[i]; 5671 if (ocfs2_xattr_is_local(xe)) 5672 continue; 5673 5674 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, 5675 i, &xv, NULL); 5676 if (ret) { 5677 mlog_errno(ret); 5678 break; 5679 } 5680 5681 ret = ocfs2_lock_xattr_remove_allocators(inode, xv, 5682 args->ref_ci, 5683 args->ref_root_bh, 5684 &ctxt.meta_ac, 5685 &ref_credits); 5686 5687 ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits); 5688 if (IS_ERR(ctxt.handle)) { 5689 ret = PTR_ERR(ctxt.handle); 5690 mlog_errno(ret); 5691 break; 5692 } 5693 5694 ret = ocfs2_xattr_bucket_value_truncate(inode, bucket, 5695 i, 0, &ctxt); 5696 5697 ocfs2_commit_trans(osb, ctxt.handle); 5698 if (ctxt.meta_ac) { 5699 ocfs2_free_alloc_context(ctxt.meta_ac); 5700 ctxt.meta_ac = NULL; 5701 } 5702 if (ret) { 5703 mlog_errno(ret); 5704 break; 5705 } 5706 } 5707 5708 if (ctxt.meta_ac) 5709 ocfs2_free_alloc_context(ctxt.meta_ac); 5710 ocfs2_schedule_truncate_log_flush(osb, 1); 5711 ocfs2_run_deallocs(osb, &ctxt.dealloc); 5712 return ret; 5713 } 5714 5715 /* 5716 * Whenever we modify a xattr value root in the bucket(e.g, CoW 5717 * or change the extent record flag), we need to recalculate 5718 * the metaecc for the whole bucket. So it is done here. 5719 * 5720 * Note: 5721 * We have to give the extra credits for the caller. 5722 */ 5723 static int ocfs2_xattr_bucket_post_refcount(struct inode *inode, 5724 handle_t *handle, 5725 void *para) 5726 { 5727 int ret; 5728 struct ocfs2_xattr_bucket *bucket = 5729 (struct ocfs2_xattr_bucket *)para; 5730 5731 ret = ocfs2_xattr_bucket_journal_access(handle, bucket, 5732 OCFS2_JOURNAL_ACCESS_WRITE); 5733 if (ret) { 5734 mlog_errno(ret); 5735 return ret; 5736 } 5737 5738 ocfs2_xattr_bucket_journal_dirty(handle, bucket); 5739 5740 return 0; 5741 } 5742 5743 /* 5744 * Special action we need if the xattr value is refcounted. 5745 * 5746 * 1. If the xattr is refcounted, lock the tree. 5747 * 2. CoW the xattr if we are setting the new value and the value 5748 * will be stored outside. 5749 * 3. In other case, decrease_refcount will work for us, so just 5750 * lock the refcount tree, calculate the meta and credits is OK. 5751 * 5752 * We have to do CoW before ocfs2_init_xattr_set_ctxt since 5753 * currently CoW is a completed transaction, while this function 5754 * will also lock the allocators and let us deadlock. So we will 5755 * CoW the whole xattr value. 5756 */ 5757 static int ocfs2_prepare_refcount_xattr(struct inode *inode, 5758 struct ocfs2_dinode *di, 5759 struct ocfs2_xattr_info *xi, 5760 struct ocfs2_xattr_search *xis, 5761 struct ocfs2_xattr_search *xbs, 5762 struct ocfs2_refcount_tree **ref_tree, 5763 int *meta_add, 5764 int *credits) 5765 { 5766 int ret = 0; 5767 struct ocfs2_xattr_block *xb; 5768 struct ocfs2_xattr_entry *xe; 5769 char *base; 5770 u32 p_cluster, num_clusters; 5771 unsigned int ext_flags; 5772 int name_offset, name_len; 5773 struct ocfs2_xattr_value_buf vb; 5774 struct ocfs2_xattr_bucket *bucket = NULL; 5775 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5776 struct ocfs2_post_refcount refcount; 5777 struct ocfs2_post_refcount *p = NULL; 5778 struct buffer_head *ref_root_bh = NULL; 5779 5780 if (!xis->not_found) { 5781 xe = xis->here; 5782 name_offset = le16_to_cpu(xe->xe_name_offset); 5783 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 5784 base = xis->base; 5785 vb.vb_bh = xis->inode_bh; 5786 vb.vb_access = ocfs2_journal_access_di; 5787 } else { 5788 int i, block_off = 0; 5789 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; 5790 xe = xbs->here; 5791 name_offset = le16_to_cpu(xe->xe_name_offset); 5792 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 5793 i = xbs->here - xbs->header->xh_entries; 5794 5795 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 5796 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 5797 bucket_xh(xbs->bucket), 5798 i, &block_off, 5799 &name_offset); 5800 if (ret) { 5801 mlog_errno(ret); 5802 goto out; 5803 } 5804 base = bucket_block(xbs->bucket, block_off); 5805 vb.vb_bh = xbs->bucket->bu_bhs[block_off]; 5806 vb.vb_access = ocfs2_journal_access; 5807 5808 if (ocfs2_meta_ecc(osb)) { 5809 /*create parameters for ocfs2_post_refcount. */ 5810 bucket = xbs->bucket; 5811 refcount.credits = bucket->bu_blocks; 5812 refcount.para = bucket; 5813 refcount.func = 5814 ocfs2_xattr_bucket_post_refcount; 5815 p = &refcount; 5816 } 5817 } else { 5818 base = xbs->base; 5819 vb.vb_bh = xbs->xattr_bh; 5820 vb.vb_access = ocfs2_journal_access_xb; 5821 } 5822 } 5823 5824 if (ocfs2_xattr_is_local(xe)) 5825 goto out; 5826 5827 vb.vb_xv = (struct ocfs2_xattr_value_root *) 5828 (base + name_offset + name_len); 5829 5830 ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster, 5831 &num_clusters, &vb.vb_xv->xr_list, 5832 &ext_flags); 5833 if (ret) { 5834 mlog_errno(ret); 5835 goto out; 5836 } 5837 5838 /* 5839 * We just need to check the 1st extent record, since we always 5840 * CoW the whole xattr. So there shouldn't be a xattr with 5841 * some REFCOUNT extent recs after the 1st one. 5842 */ 5843 if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) 5844 goto out; 5845 5846 ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc), 5847 1, ref_tree, &ref_root_bh); 5848 if (ret) { 5849 mlog_errno(ret); 5850 goto out; 5851 } 5852 5853 /* 5854 * If we are deleting the xattr or the new size will be stored inside, 5855 * cool, leave it there, the xattr truncate process will remove them 5856 * for us(it still needs the refcount tree lock and the meta, credits). 5857 * And the worse case is that every cluster truncate will split the 5858 * refcount tree, and make the original extent become 3. So we will need 5859 * 2 * cluster more extent recs at most. 5860 */ 5861 if (!xi->xi_value || xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE) { 5862 5863 ret = ocfs2_refcounted_xattr_delete_need(inode, 5864 &(*ref_tree)->rf_ci, 5865 ref_root_bh, vb.vb_xv, 5866 meta_add, credits); 5867 if (ret) 5868 mlog_errno(ret); 5869 goto out; 5870 } 5871 5872 ret = ocfs2_refcount_cow_xattr(inode, di, &vb, 5873 *ref_tree, ref_root_bh, 0, 5874 le32_to_cpu(vb.vb_xv->xr_clusters), p); 5875 if (ret) 5876 mlog_errno(ret); 5877 5878 out: 5879 brelse(ref_root_bh); 5880 return ret; 5881 } 5882 5883 /* 5884 * Add the REFCOUNTED flags for all the extent rec in ocfs2_xattr_value_root. 5885 * The physical clusters will be added to refcount tree. 5886 */ 5887 static int ocfs2_xattr_value_attach_refcount(struct inode *inode, 5888 struct ocfs2_xattr_value_root *xv, 5889 struct ocfs2_extent_tree *value_et, 5890 struct ocfs2_caching_info *ref_ci, 5891 struct buffer_head *ref_root_bh, 5892 struct ocfs2_cached_dealloc_ctxt *dealloc, 5893 struct ocfs2_post_refcount *refcount) 5894 { 5895 int ret = 0; 5896 u32 clusters = le32_to_cpu(xv->xr_clusters); 5897 u32 cpos, p_cluster, num_clusters; 5898 struct ocfs2_extent_list *el = &xv->xr_list; 5899 unsigned int ext_flags; 5900 5901 cpos = 0; 5902 while (cpos < clusters) { 5903 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, 5904 &num_clusters, el, &ext_flags); 5905 if (ret) { 5906 mlog_errno(ret); 5907 break; 5908 } 5909 5910 cpos += num_clusters; 5911 if ((ext_flags & OCFS2_EXT_REFCOUNTED)) 5912 continue; 5913 5914 BUG_ON(!p_cluster); 5915 5916 ret = ocfs2_add_refcount_flag(inode, value_et, 5917 ref_ci, ref_root_bh, 5918 cpos - num_clusters, 5919 p_cluster, num_clusters, 5920 dealloc, refcount); 5921 if (ret) { 5922 mlog_errno(ret); 5923 break; 5924 } 5925 } 5926 5927 return ret; 5928 } 5929 5930 /* 5931 * Given a normal ocfs2_xattr_header, refcount all the entries which 5932 * have value stored outside. 5933 * Used for xattrs stored in inode and ocfs2_xattr_block. 5934 */ 5935 static int ocfs2_xattr_attach_refcount_normal(struct inode *inode, 5936 struct ocfs2_xattr_value_buf *vb, 5937 struct ocfs2_xattr_header *header, 5938 struct ocfs2_caching_info *ref_ci, 5939 struct buffer_head *ref_root_bh, 5940 struct ocfs2_cached_dealloc_ctxt *dealloc) 5941 { 5942 5943 struct ocfs2_xattr_entry *xe; 5944 struct ocfs2_xattr_value_root *xv; 5945 struct ocfs2_extent_tree et; 5946 int i, ret = 0; 5947 5948 for (i = 0; i < le16_to_cpu(header->xh_count); i++) { 5949 xe = &header->xh_entries[i]; 5950 5951 if (ocfs2_xattr_is_local(xe)) 5952 continue; 5953 5954 xv = (struct ocfs2_xattr_value_root *)((void *)header + 5955 le16_to_cpu(xe->xe_name_offset) + 5956 OCFS2_XATTR_SIZE(xe->xe_name_len)); 5957 5958 vb->vb_xv = xv; 5959 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); 5960 5961 ret = ocfs2_xattr_value_attach_refcount(inode, xv, &et, 5962 ref_ci, ref_root_bh, 5963 dealloc, NULL); 5964 if (ret) { 5965 mlog_errno(ret); 5966 break; 5967 } 5968 } 5969 5970 return ret; 5971 } 5972 5973 static int ocfs2_xattr_inline_attach_refcount(struct inode *inode, 5974 struct buffer_head *fe_bh, 5975 struct ocfs2_caching_info *ref_ci, 5976 struct buffer_head *ref_root_bh, 5977 struct ocfs2_cached_dealloc_ctxt *dealloc) 5978 { 5979 struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data; 5980 struct ocfs2_xattr_header *header = (struct ocfs2_xattr_header *) 5981 (fe_bh->b_data + inode->i_sb->s_blocksize - 5982 le16_to_cpu(di->i_xattr_inline_size)); 5983 struct ocfs2_xattr_value_buf vb = { 5984 .vb_bh = fe_bh, 5985 .vb_access = ocfs2_journal_access_di, 5986 }; 5987 5988 return ocfs2_xattr_attach_refcount_normal(inode, &vb, header, 5989 ref_ci, ref_root_bh, dealloc); 5990 } 5991 5992 struct ocfs2_xattr_tree_value_refcount_para { 5993 struct ocfs2_caching_info *ref_ci; 5994 struct buffer_head *ref_root_bh; 5995 struct ocfs2_cached_dealloc_ctxt *dealloc; 5996 }; 5997 5998 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb, 5999 struct ocfs2_xattr_bucket *bucket, 6000 int offset, 6001 struct ocfs2_xattr_value_root **xv, 6002 struct buffer_head **bh) 6003 { 6004 int ret, block_off, name_offset; 6005 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 6006 struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset]; 6007 void *base; 6008 6009 ret = ocfs2_xattr_bucket_get_name_value(sb, 6010 bucket_xh(bucket), 6011 offset, 6012 &block_off, 6013 &name_offset); 6014 if (ret) { 6015 mlog_errno(ret); 6016 goto out; 6017 } 6018 6019 base = bucket_block(bucket, block_off); 6020 6021 *xv = (struct ocfs2_xattr_value_root *)(base + name_offset + 6022 OCFS2_XATTR_SIZE(xe->xe_name_len)); 6023 6024 if (bh) 6025 *bh = bucket->bu_bhs[block_off]; 6026 out: 6027 return ret; 6028 } 6029 6030 /* 6031 * For a given xattr bucket, refcount all the entries which 6032 * have value stored outside. 6033 */ 6034 static int ocfs2_xattr_bucket_value_refcount(struct inode *inode, 6035 struct ocfs2_xattr_bucket *bucket, 6036 void *para) 6037 { 6038 int i, ret = 0; 6039 struct ocfs2_extent_tree et; 6040 struct ocfs2_xattr_tree_value_refcount_para *ref = 6041 (struct ocfs2_xattr_tree_value_refcount_para *)para; 6042 struct ocfs2_xattr_header *xh = 6043 (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data; 6044 struct ocfs2_xattr_entry *xe; 6045 struct ocfs2_xattr_value_buf vb = { 6046 .vb_access = ocfs2_journal_access, 6047 }; 6048 struct ocfs2_post_refcount refcount = { 6049 .credits = bucket->bu_blocks, 6050 .para = bucket, 6051 .func = ocfs2_xattr_bucket_post_refcount, 6052 }; 6053 struct ocfs2_post_refcount *p = NULL; 6054 6055 /* We only need post_refcount if we support metaecc. */ 6056 if (ocfs2_meta_ecc(OCFS2_SB(inode->i_sb))) 6057 p = &refcount; 6058 6059 trace_ocfs2_xattr_bucket_value_refcount( 6060 (unsigned long long)bucket_blkno(bucket), 6061 le16_to_cpu(xh->xh_count)); 6062 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 6063 xe = &xh->xh_entries[i]; 6064 6065 if (ocfs2_xattr_is_local(xe)) 6066 continue; 6067 6068 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, i, 6069 &vb.vb_xv, &vb.vb_bh); 6070 if (ret) { 6071 mlog_errno(ret); 6072 break; 6073 } 6074 6075 ocfs2_init_xattr_value_extent_tree(&et, 6076 INODE_CACHE(inode), &vb); 6077 6078 ret = ocfs2_xattr_value_attach_refcount(inode, vb.vb_xv, 6079 &et, ref->ref_ci, 6080 ref->ref_root_bh, 6081 ref->dealloc, p); 6082 if (ret) { 6083 mlog_errno(ret); 6084 break; 6085 } 6086 } 6087 6088 return ret; 6089 6090 } 6091 6092 static int ocfs2_refcount_xattr_tree_rec(struct inode *inode, 6093 struct buffer_head *root_bh, 6094 u64 blkno, u32 cpos, u32 len, void *para) 6095 { 6096 return ocfs2_iterate_xattr_buckets(inode, blkno, len, 6097 ocfs2_xattr_bucket_value_refcount, 6098 para); 6099 } 6100 6101 static int ocfs2_xattr_block_attach_refcount(struct inode *inode, 6102 struct buffer_head *blk_bh, 6103 struct ocfs2_caching_info *ref_ci, 6104 struct buffer_head *ref_root_bh, 6105 struct ocfs2_cached_dealloc_ctxt *dealloc) 6106 { 6107 int ret = 0; 6108 struct ocfs2_xattr_block *xb = 6109 (struct ocfs2_xattr_block *)blk_bh->b_data; 6110 6111 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 6112 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header; 6113 struct ocfs2_xattr_value_buf vb = { 6114 .vb_bh = blk_bh, 6115 .vb_access = ocfs2_journal_access_xb, 6116 }; 6117 6118 ret = ocfs2_xattr_attach_refcount_normal(inode, &vb, header, 6119 ref_ci, ref_root_bh, 6120 dealloc); 6121 } else { 6122 struct ocfs2_xattr_tree_value_refcount_para para = { 6123 .ref_ci = ref_ci, 6124 .ref_root_bh = ref_root_bh, 6125 .dealloc = dealloc, 6126 }; 6127 6128 ret = ocfs2_iterate_xattr_index_block(inode, blk_bh, 6129 ocfs2_refcount_xattr_tree_rec, 6130 ¶); 6131 } 6132 6133 return ret; 6134 } 6135 6136 int ocfs2_xattr_attach_refcount_tree(struct inode *inode, 6137 struct buffer_head *fe_bh, 6138 struct ocfs2_caching_info *ref_ci, 6139 struct buffer_head *ref_root_bh, 6140 struct ocfs2_cached_dealloc_ctxt *dealloc) 6141 { 6142 int ret = 0; 6143 struct ocfs2_inode_info *oi = OCFS2_I(inode); 6144 struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data; 6145 struct buffer_head *blk_bh = NULL; 6146 6147 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 6148 ret = ocfs2_xattr_inline_attach_refcount(inode, fe_bh, 6149 ref_ci, ref_root_bh, 6150 dealloc); 6151 if (ret) { 6152 mlog_errno(ret); 6153 goto out; 6154 } 6155 } 6156 6157 if (!di->i_xattr_loc) 6158 goto out; 6159 6160 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc), 6161 &blk_bh); 6162 if (ret < 0) { 6163 mlog_errno(ret); 6164 goto out; 6165 } 6166 6167 ret = ocfs2_xattr_block_attach_refcount(inode, blk_bh, ref_ci, 6168 ref_root_bh, dealloc); 6169 if (ret) 6170 mlog_errno(ret); 6171 6172 brelse(blk_bh); 6173 out: 6174 6175 return ret; 6176 } 6177 6178 typedef int (should_xattr_reflinked)(struct ocfs2_xattr_entry *xe); 6179 /* 6180 * Store the information we need in xattr reflink. 6181 * old_bh and new_bh are inode bh for the old and new inode. 6182 */ 6183 struct ocfs2_xattr_reflink { 6184 struct inode *old_inode; 6185 struct inode *new_inode; 6186 struct buffer_head *old_bh; 6187 struct buffer_head *new_bh; 6188 struct ocfs2_caching_info *ref_ci; 6189 struct buffer_head *ref_root_bh; 6190 struct ocfs2_cached_dealloc_ctxt *dealloc; 6191 should_xattr_reflinked *xattr_reflinked; 6192 }; 6193 6194 /* 6195 * Given a xattr header and xe offset, 6196 * return the proper xv and the corresponding bh. 6197 * xattr in inode, block and xattr tree have different implementaions. 6198 */ 6199 typedef int (get_xattr_value_root)(struct super_block *sb, 6200 struct buffer_head *bh, 6201 struct ocfs2_xattr_header *xh, 6202 int offset, 6203 struct ocfs2_xattr_value_root **xv, 6204 struct buffer_head **ret_bh, 6205 void *para); 6206 6207 /* 6208 * Calculate all the xattr value root metadata stored in this xattr header and 6209 * credits we need if we create them from the scratch. 6210 * We use get_xattr_value_root so that all types of xattr container can use it. 6211 */ 6212 static int ocfs2_value_metas_in_xattr_header(struct super_block *sb, 6213 struct buffer_head *bh, 6214 struct ocfs2_xattr_header *xh, 6215 int *metas, int *credits, 6216 int *num_recs, 6217 get_xattr_value_root *func, 6218 void *para) 6219 { 6220 int i, ret = 0; 6221 struct ocfs2_xattr_value_root *xv; 6222 struct ocfs2_xattr_entry *xe; 6223 6224 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 6225 xe = &xh->xh_entries[i]; 6226 if (ocfs2_xattr_is_local(xe)) 6227 continue; 6228 6229 ret = func(sb, bh, xh, i, &xv, NULL, para); 6230 if (ret) { 6231 mlog_errno(ret); 6232 break; 6233 } 6234 6235 *metas += le16_to_cpu(xv->xr_list.l_tree_depth) * 6236 le16_to_cpu(xv->xr_list.l_next_free_rec); 6237 6238 *credits += ocfs2_calc_extend_credits(sb, 6239 &def_xv.xv.xr_list); 6240 6241 /* 6242 * If the value is a tree with depth > 1, We don't go deep 6243 * to the extent block, so just calculate a maximum record num. 6244 */ 6245 if (!xv->xr_list.l_tree_depth) 6246 *num_recs += le16_to_cpu(xv->xr_list.l_next_free_rec); 6247 else 6248 *num_recs += ocfs2_clusters_for_bytes(sb, 6249 XATTR_SIZE_MAX); 6250 } 6251 6252 return ret; 6253 } 6254 6255 /* Used by xattr inode and block to return the right xv and buffer_head. */ 6256 static int ocfs2_get_xattr_value_root(struct super_block *sb, 6257 struct buffer_head *bh, 6258 struct ocfs2_xattr_header *xh, 6259 int offset, 6260 struct ocfs2_xattr_value_root **xv, 6261 struct buffer_head **ret_bh, 6262 void *para) 6263 { 6264 struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset]; 6265 6266 *xv = (struct ocfs2_xattr_value_root *)((void *)xh + 6267 le16_to_cpu(xe->xe_name_offset) + 6268 OCFS2_XATTR_SIZE(xe->xe_name_len)); 6269 6270 if (ret_bh) 6271 *ret_bh = bh; 6272 6273 return 0; 6274 } 6275 6276 /* 6277 * Lock the meta_ac and caculate how much credits we need for reflink xattrs. 6278 * It is only used for inline xattr and xattr block. 6279 */ 6280 static int ocfs2_reflink_lock_xattr_allocators(struct ocfs2_super *osb, 6281 struct ocfs2_xattr_header *xh, 6282 struct buffer_head *ref_root_bh, 6283 int *credits, 6284 struct ocfs2_alloc_context **meta_ac) 6285 { 6286 int ret, meta_add = 0, num_recs = 0; 6287 struct ocfs2_refcount_block *rb = 6288 (struct ocfs2_refcount_block *)ref_root_bh->b_data; 6289 6290 *credits = 0; 6291 6292 ret = ocfs2_value_metas_in_xattr_header(osb->sb, NULL, xh, 6293 &meta_add, credits, &num_recs, 6294 ocfs2_get_xattr_value_root, 6295 NULL); 6296 if (ret) { 6297 mlog_errno(ret); 6298 goto out; 6299 } 6300 6301 /* 6302 * We need to add/modify num_recs in refcount tree, so just calculate 6303 * an approximate number we need for refcount tree change. 6304 * Sometimes we need to split the tree, and after split, half recs 6305 * will be moved to the new block, and a new block can only provide 6306 * half number of recs. So we multiple new blocks by 2. 6307 */ 6308 num_recs = num_recs / ocfs2_refcount_recs_per_rb(osb->sb) * 2; 6309 meta_add += num_recs; 6310 *credits += num_recs + num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS; 6311 if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL) 6312 *credits += le16_to_cpu(rb->rf_list.l_tree_depth) * 6313 le16_to_cpu(rb->rf_list.l_next_free_rec) + 1; 6314 else 6315 *credits += 1; 6316 6317 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, meta_ac); 6318 if (ret) 6319 mlog_errno(ret); 6320 6321 out: 6322 return ret; 6323 } 6324 6325 /* 6326 * Given a xattr header, reflink all the xattrs in this container. 6327 * It can be used for inode, block and bucket. 6328 * 6329 * NOTE: 6330 * Before we call this function, the caller has memcpy the xattr in 6331 * old_xh to the new_xh. 6332 * 6333 * If args.xattr_reflinked is set, call it to decide whether the xe should 6334 * be reflinked or not. If not, remove it from the new xattr header. 6335 */ 6336 static int ocfs2_reflink_xattr_header(handle_t *handle, 6337 struct ocfs2_xattr_reflink *args, 6338 struct buffer_head *old_bh, 6339 struct ocfs2_xattr_header *xh, 6340 struct buffer_head *new_bh, 6341 struct ocfs2_xattr_header *new_xh, 6342 struct ocfs2_xattr_value_buf *vb, 6343 struct ocfs2_alloc_context *meta_ac, 6344 get_xattr_value_root *func, 6345 void *para) 6346 { 6347 int ret = 0, i, j; 6348 struct super_block *sb = args->old_inode->i_sb; 6349 struct buffer_head *value_bh; 6350 struct ocfs2_xattr_entry *xe, *last; 6351 struct ocfs2_xattr_value_root *xv, *new_xv; 6352 struct ocfs2_extent_tree data_et; 6353 u32 clusters, cpos, p_cluster, num_clusters; 6354 unsigned int ext_flags = 0; 6355 6356 trace_ocfs2_reflink_xattr_header((unsigned long long)old_bh->b_blocknr, 6357 le16_to_cpu(xh->xh_count)); 6358 6359 last = &new_xh->xh_entries[le16_to_cpu(new_xh->xh_count)]; 6360 for (i = 0, j = 0; i < le16_to_cpu(xh->xh_count); i++, j++) { 6361 xe = &xh->xh_entries[i]; 6362 6363 if (args->xattr_reflinked && !args->xattr_reflinked(xe)) { 6364 xe = &new_xh->xh_entries[j]; 6365 6366 le16_add_cpu(&new_xh->xh_count, -1); 6367 if (new_xh->xh_count) { 6368 memmove(xe, xe + 1, 6369 (void *)last - (void *)xe); 6370 memset(last, 0, 6371 sizeof(struct ocfs2_xattr_entry)); 6372 } 6373 6374 /* 6375 * We don't want j to increase in the next round since 6376 * it is already moved ahead. 6377 */ 6378 j--; 6379 continue; 6380 } 6381 6382 if (ocfs2_xattr_is_local(xe)) 6383 continue; 6384 6385 ret = func(sb, old_bh, xh, i, &xv, NULL, para); 6386 if (ret) { 6387 mlog_errno(ret); 6388 break; 6389 } 6390 6391 ret = func(sb, new_bh, new_xh, j, &new_xv, &value_bh, para); 6392 if (ret) { 6393 mlog_errno(ret); 6394 break; 6395 } 6396 6397 /* 6398 * For the xattr which has l_tree_depth = 0, all the extent 6399 * recs have already be copied to the new xh with the 6400 * propriate OCFS2_EXT_REFCOUNTED flag we just need to 6401 * increase the refount count int the refcount tree. 6402 * 6403 * For the xattr which has l_tree_depth > 0, we need 6404 * to initialize it to the empty default value root, 6405 * and then insert the extents one by one. 6406 */ 6407 if (xv->xr_list.l_tree_depth) { 6408 memcpy(new_xv, &def_xv, OCFS2_XATTR_ROOT_SIZE); 6409 vb->vb_xv = new_xv; 6410 vb->vb_bh = value_bh; 6411 ocfs2_init_xattr_value_extent_tree(&data_et, 6412 INODE_CACHE(args->new_inode), vb); 6413 } 6414 6415 clusters = le32_to_cpu(xv->xr_clusters); 6416 cpos = 0; 6417 while (cpos < clusters) { 6418 ret = ocfs2_xattr_get_clusters(args->old_inode, 6419 cpos, 6420 &p_cluster, 6421 &num_clusters, 6422 &xv->xr_list, 6423 &ext_flags); 6424 if (ret) { 6425 mlog_errno(ret); 6426 goto out; 6427 } 6428 6429 BUG_ON(!p_cluster); 6430 6431 if (xv->xr_list.l_tree_depth) { 6432 ret = ocfs2_insert_extent(handle, 6433 &data_et, cpos, 6434 ocfs2_clusters_to_blocks( 6435 args->old_inode->i_sb, 6436 p_cluster), 6437 num_clusters, ext_flags, 6438 meta_ac); 6439 if (ret) { 6440 mlog_errno(ret); 6441 goto out; 6442 } 6443 } 6444 6445 ret = ocfs2_increase_refcount(handle, args->ref_ci, 6446 args->ref_root_bh, 6447 p_cluster, num_clusters, 6448 meta_ac, args->dealloc); 6449 if (ret) { 6450 mlog_errno(ret); 6451 goto out; 6452 } 6453 6454 cpos += num_clusters; 6455 } 6456 } 6457 6458 out: 6459 return ret; 6460 } 6461 6462 static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args) 6463 { 6464 int ret = 0, credits = 0; 6465 handle_t *handle; 6466 struct ocfs2_super *osb = OCFS2_SB(args->old_inode->i_sb); 6467 struct ocfs2_dinode *di = (struct ocfs2_dinode *)args->old_bh->b_data; 6468 int inline_size = le16_to_cpu(di->i_xattr_inline_size); 6469 int header_off = osb->sb->s_blocksize - inline_size; 6470 struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *) 6471 (args->old_bh->b_data + header_off); 6472 struct ocfs2_xattr_header *new_xh = (struct ocfs2_xattr_header *) 6473 (args->new_bh->b_data + header_off); 6474 struct ocfs2_alloc_context *meta_ac = NULL; 6475 struct ocfs2_inode_info *new_oi; 6476 struct ocfs2_dinode *new_di; 6477 struct ocfs2_xattr_value_buf vb = { 6478 .vb_bh = args->new_bh, 6479 .vb_access = ocfs2_journal_access_di, 6480 }; 6481 6482 ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh, 6483 &credits, &meta_ac); 6484 if (ret) { 6485 mlog_errno(ret); 6486 goto out; 6487 } 6488 6489 handle = ocfs2_start_trans(osb, credits); 6490 if (IS_ERR(handle)) { 6491 ret = PTR_ERR(handle); 6492 mlog_errno(ret); 6493 goto out; 6494 } 6495 6496 ret = ocfs2_journal_access_di(handle, INODE_CACHE(args->new_inode), 6497 args->new_bh, OCFS2_JOURNAL_ACCESS_WRITE); 6498 if (ret) { 6499 mlog_errno(ret); 6500 goto out_commit; 6501 } 6502 6503 memcpy(args->new_bh->b_data + header_off, 6504 args->old_bh->b_data + header_off, inline_size); 6505 6506 new_di = (struct ocfs2_dinode *)args->new_bh->b_data; 6507 new_di->i_xattr_inline_size = cpu_to_le16(inline_size); 6508 6509 ret = ocfs2_reflink_xattr_header(handle, args, args->old_bh, xh, 6510 args->new_bh, new_xh, &vb, meta_ac, 6511 ocfs2_get_xattr_value_root, NULL); 6512 if (ret) { 6513 mlog_errno(ret); 6514 goto out_commit; 6515 } 6516 6517 new_oi = OCFS2_I(args->new_inode); 6518 /* 6519 * Adjust extent record count to reserve space for extended attribute. 6520 * Inline data count had been adjusted in ocfs2_duplicate_inline_data(). 6521 */ 6522 if (!(new_oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) && 6523 !(ocfs2_inode_is_fast_symlink(args->new_inode))) { 6524 struct ocfs2_extent_list *el = &new_di->id2.i_list; 6525 le16_add_cpu(&el->l_count, -(inline_size / 6526 sizeof(struct ocfs2_extent_rec))); 6527 } 6528 spin_lock(&new_oi->ip_lock); 6529 new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL; 6530 new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features); 6531 spin_unlock(&new_oi->ip_lock); 6532 6533 ocfs2_journal_dirty(handle, args->new_bh); 6534 6535 out_commit: 6536 ocfs2_commit_trans(osb, handle); 6537 6538 out: 6539 if (meta_ac) 6540 ocfs2_free_alloc_context(meta_ac); 6541 return ret; 6542 } 6543 6544 static int ocfs2_create_empty_xattr_block(struct inode *inode, 6545 struct buffer_head *fe_bh, 6546 struct buffer_head **ret_bh, 6547 int indexed) 6548 { 6549 int ret; 6550 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 6551 struct ocfs2_xattr_set_ctxt ctxt; 6552 6553 memset(&ctxt, 0, sizeof(ctxt)); 6554 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &ctxt.meta_ac); 6555 if (ret < 0) { 6556 mlog_errno(ret); 6557 return ret; 6558 } 6559 6560 ctxt.handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_CREATE_CREDITS); 6561 if (IS_ERR(ctxt.handle)) { 6562 ret = PTR_ERR(ctxt.handle); 6563 mlog_errno(ret); 6564 goto out; 6565 } 6566 6567 trace_ocfs2_create_empty_xattr_block( 6568 (unsigned long long)fe_bh->b_blocknr, indexed); 6569 ret = ocfs2_create_xattr_block(inode, fe_bh, &ctxt, indexed, 6570 ret_bh); 6571 if (ret) 6572 mlog_errno(ret); 6573 6574 ocfs2_commit_trans(osb, ctxt.handle); 6575 out: 6576 ocfs2_free_alloc_context(ctxt.meta_ac); 6577 return ret; 6578 } 6579 6580 static int ocfs2_reflink_xattr_block(struct ocfs2_xattr_reflink *args, 6581 struct buffer_head *blk_bh, 6582 struct buffer_head *new_blk_bh) 6583 { 6584 int ret = 0, credits = 0; 6585 handle_t *handle; 6586 struct ocfs2_inode_info *new_oi = OCFS2_I(args->new_inode); 6587 struct ocfs2_dinode *new_di; 6588 struct ocfs2_super *osb = OCFS2_SB(args->new_inode->i_sb); 6589 int header_off = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); 6590 struct ocfs2_xattr_block *xb = 6591 (struct ocfs2_xattr_block *)blk_bh->b_data; 6592 struct ocfs2_xattr_header *xh = &xb->xb_attrs.xb_header; 6593 struct ocfs2_xattr_block *new_xb = 6594 (struct ocfs2_xattr_block *)new_blk_bh->b_data; 6595 struct ocfs2_xattr_header *new_xh = &new_xb->xb_attrs.xb_header; 6596 struct ocfs2_alloc_context *meta_ac; 6597 struct ocfs2_xattr_value_buf vb = { 6598 .vb_bh = new_blk_bh, 6599 .vb_access = ocfs2_journal_access_xb, 6600 }; 6601 6602 ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh, 6603 &credits, &meta_ac); 6604 if (ret) { 6605 mlog_errno(ret); 6606 return ret; 6607 } 6608 6609 /* One more credits in case we need to add xattr flags in new inode. */ 6610 handle = ocfs2_start_trans(osb, credits + 1); 6611 if (IS_ERR(handle)) { 6612 ret = PTR_ERR(handle); 6613 mlog_errno(ret); 6614 goto out; 6615 } 6616 6617 if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) { 6618 ret = ocfs2_journal_access_di(handle, 6619 INODE_CACHE(args->new_inode), 6620 args->new_bh, 6621 OCFS2_JOURNAL_ACCESS_WRITE); 6622 if (ret) { 6623 mlog_errno(ret); 6624 goto out_commit; 6625 } 6626 } 6627 6628 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(args->new_inode), 6629 new_blk_bh, OCFS2_JOURNAL_ACCESS_WRITE); 6630 if (ret) { 6631 mlog_errno(ret); 6632 goto out_commit; 6633 } 6634 6635 memcpy(new_blk_bh->b_data + header_off, blk_bh->b_data + header_off, 6636 osb->sb->s_blocksize - header_off); 6637 6638 ret = ocfs2_reflink_xattr_header(handle, args, blk_bh, xh, 6639 new_blk_bh, new_xh, &vb, meta_ac, 6640 ocfs2_get_xattr_value_root, NULL); 6641 if (ret) { 6642 mlog_errno(ret); 6643 goto out_commit; 6644 } 6645 6646 ocfs2_journal_dirty(handle, new_blk_bh); 6647 6648 if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) { 6649 new_di = (struct ocfs2_dinode *)args->new_bh->b_data; 6650 spin_lock(&new_oi->ip_lock); 6651 new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL; 6652 new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features); 6653 spin_unlock(&new_oi->ip_lock); 6654 6655 ocfs2_journal_dirty(handle, args->new_bh); 6656 } 6657 6658 out_commit: 6659 ocfs2_commit_trans(osb, handle); 6660 6661 out: 6662 ocfs2_free_alloc_context(meta_ac); 6663 return ret; 6664 } 6665 6666 struct ocfs2_reflink_xattr_tree_args { 6667 struct ocfs2_xattr_reflink *reflink; 6668 struct buffer_head *old_blk_bh; 6669 struct buffer_head *new_blk_bh; 6670 struct ocfs2_xattr_bucket *old_bucket; 6671 struct ocfs2_xattr_bucket *new_bucket; 6672 }; 6673 6674 /* 6675 * NOTE: 6676 * We have to handle the case that both old bucket and new bucket 6677 * will call this function to get the right ret_bh. 6678 * So The caller must give us the right bh. 6679 */ 6680 static int ocfs2_get_reflink_xattr_value_root(struct super_block *sb, 6681 struct buffer_head *bh, 6682 struct ocfs2_xattr_header *xh, 6683 int offset, 6684 struct ocfs2_xattr_value_root **xv, 6685 struct buffer_head **ret_bh, 6686 void *para) 6687 { 6688 struct ocfs2_reflink_xattr_tree_args *args = 6689 (struct ocfs2_reflink_xattr_tree_args *)para; 6690 struct ocfs2_xattr_bucket *bucket; 6691 6692 if (bh == args->old_bucket->bu_bhs[0]) 6693 bucket = args->old_bucket; 6694 else 6695 bucket = args->new_bucket; 6696 6697 return ocfs2_get_xattr_tree_value_root(sb, bucket, offset, 6698 xv, ret_bh); 6699 } 6700 6701 struct ocfs2_value_tree_metas { 6702 int num_metas; 6703 int credits; 6704 int num_recs; 6705 }; 6706 6707 static int ocfs2_value_tree_metas_in_bucket(struct super_block *sb, 6708 struct buffer_head *bh, 6709 struct ocfs2_xattr_header *xh, 6710 int offset, 6711 struct ocfs2_xattr_value_root **xv, 6712 struct buffer_head **ret_bh, 6713 void *para) 6714 { 6715 struct ocfs2_xattr_bucket *bucket = 6716 (struct ocfs2_xattr_bucket *)para; 6717 6718 return ocfs2_get_xattr_tree_value_root(sb, bucket, offset, 6719 xv, ret_bh); 6720 } 6721 6722 static int ocfs2_calc_value_tree_metas(struct inode *inode, 6723 struct ocfs2_xattr_bucket *bucket, 6724 void *para) 6725 { 6726 struct ocfs2_value_tree_metas *metas = 6727 (struct ocfs2_value_tree_metas *)para; 6728 struct ocfs2_xattr_header *xh = 6729 (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data; 6730 6731 /* Add the credits for this bucket first. */ 6732 metas->credits += bucket->bu_blocks; 6733 return ocfs2_value_metas_in_xattr_header(inode->i_sb, bucket->bu_bhs[0], 6734 xh, &metas->num_metas, 6735 &metas->credits, &metas->num_recs, 6736 ocfs2_value_tree_metas_in_bucket, 6737 bucket); 6738 } 6739 6740 /* 6741 * Given a xattr extent rec starting from blkno and having len clusters, 6742 * iterate all the buckets calculate how much metadata we need for reflinking 6743 * all the ocfs2_xattr_value_root and lock the allocators accordingly. 6744 */ 6745 static int ocfs2_lock_reflink_xattr_rec_allocators( 6746 struct ocfs2_reflink_xattr_tree_args *args, 6747 struct ocfs2_extent_tree *xt_et, 6748 u64 blkno, u32 len, int *credits, 6749 struct ocfs2_alloc_context **meta_ac, 6750 struct ocfs2_alloc_context **data_ac) 6751 { 6752 int ret, num_free_extents; 6753 struct ocfs2_value_tree_metas metas; 6754 struct ocfs2_super *osb = OCFS2_SB(args->reflink->old_inode->i_sb); 6755 struct ocfs2_refcount_block *rb; 6756 6757 memset(&metas, 0, sizeof(metas)); 6758 6759 ret = ocfs2_iterate_xattr_buckets(args->reflink->old_inode, blkno, len, 6760 ocfs2_calc_value_tree_metas, &metas); 6761 if (ret) { 6762 mlog_errno(ret); 6763 goto out; 6764 } 6765 6766 *credits = metas.credits; 6767 6768 /* 6769 * Calculate we need for refcount tree change. 6770 * 6771 * We need to add/modify num_recs in refcount tree, so just calculate 6772 * an approximate number we need for refcount tree change. 6773 * Sometimes we need to split the tree, and after split, half recs 6774 * will be moved to the new block, and a new block can only provide 6775 * half number of recs. So we multiple new blocks by 2. 6776 * In the end, we have to add credits for modifying the already 6777 * existed refcount block. 6778 */ 6779 rb = (struct ocfs2_refcount_block *)args->reflink->ref_root_bh->b_data; 6780 metas.num_recs = 6781 (metas.num_recs + ocfs2_refcount_recs_per_rb(osb->sb) - 1) / 6782 ocfs2_refcount_recs_per_rb(osb->sb) * 2; 6783 metas.num_metas += metas.num_recs; 6784 *credits += metas.num_recs + 6785 metas.num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS; 6786 if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL) 6787 *credits += le16_to_cpu(rb->rf_list.l_tree_depth) * 6788 le16_to_cpu(rb->rf_list.l_next_free_rec) + 1; 6789 else 6790 *credits += 1; 6791 6792 /* count in the xattr tree change. */ 6793 num_free_extents = ocfs2_num_free_extents(xt_et); 6794 if (num_free_extents < 0) { 6795 ret = num_free_extents; 6796 mlog_errno(ret); 6797 goto out; 6798 } 6799 6800 if (num_free_extents < len) 6801 metas.num_metas += ocfs2_extend_meta_needed(xt_et->et_root_el); 6802 6803 *credits += ocfs2_calc_extend_credits(osb->sb, 6804 xt_et->et_root_el); 6805 6806 if (metas.num_metas) { 6807 ret = ocfs2_reserve_new_metadata_blocks(osb, metas.num_metas, 6808 meta_ac); 6809 if (ret) { 6810 mlog_errno(ret); 6811 goto out; 6812 } 6813 } 6814 6815 if (len) { 6816 ret = ocfs2_reserve_clusters(osb, len, data_ac); 6817 if (ret) 6818 mlog_errno(ret); 6819 } 6820 out: 6821 if (ret) { 6822 if (*meta_ac) { 6823 ocfs2_free_alloc_context(*meta_ac); 6824 *meta_ac = NULL; 6825 } 6826 } 6827 6828 return ret; 6829 } 6830 6831 static int ocfs2_reflink_xattr_bucket(handle_t *handle, 6832 u64 blkno, u64 new_blkno, u32 clusters, 6833 u32 *cpos, int num_buckets, 6834 struct ocfs2_alloc_context *meta_ac, 6835 struct ocfs2_alloc_context *data_ac, 6836 struct ocfs2_reflink_xattr_tree_args *args) 6837 { 6838 int i, j, ret = 0; 6839 struct super_block *sb = args->reflink->old_inode->i_sb; 6840 int bpb = args->old_bucket->bu_blocks; 6841 struct ocfs2_xattr_value_buf vb = { 6842 .vb_access = ocfs2_journal_access, 6843 }; 6844 6845 for (i = 0; i < num_buckets; i++, blkno += bpb, new_blkno += bpb) { 6846 ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno); 6847 if (ret) { 6848 mlog_errno(ret); 6849 break; 6850 } 6851 6852 ret = ocfs2_init_xattr_bucket(args->new_bucket, new_blkno, 1); 6853 if (ret) { 6854 mlog_errno(ret); 6855 break; 6856 } 6857 6858 ret = ocfs2_xattr_bucket_journal_access(handle, 6859 args->new_bucket, 6860 OCFS2_JOURNAL_ACCESS_CREATE); 6861 if (ret) { 6862 mlog_errno(ret); 6863 break; 6864 } 6865 6866 for (j = 0; j < bpb; j++) 6867 memcpy(bucket_block(args->new_bucket, j), 6868 bucket_block(args->old_bucket, j), 6869 sb->s_blocksize); 6870 6871 /* 6872 * Record the start cpos so that we can use it to initialize 6873 * our xattr tree we also set the xh_num_bucket for the new 6874 * bucket. 6875 */ 6876 if (i == 0) { 6877 *cpos = le32_to_cpu(bucket_xh(args->new_bucket)-> 6878 xh_entries[0].xe_name_hash); 6879 bucket_xh(args->new_bucket)->xh_num_buckets = 6880 cpu_to_le16(num_buckets); 6881 } 6882 6883 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket); 6884 6885 ret = ocfs2_reflink_xattr_header(handle, args->reflink, 6886 args->old_bucket->bu_bhs[0], 6887 bucket_xh(args->old_bucket), 6888 args->new_bucket->bu_bhs[0], 6889 bucket_xh(args->new_bucket), 6890 &vb, meta_ac, 6891 ocfs2_get_reflink_xattr_value_root, 6892 args); 6893 if (ret) { 6894 mlog_errno(ret); 6895 break; 6896 } 6897 6898 /* 6899 * Re-access and dirty the bucket to calculate metaecc. 6900 * Because we may extend the transaction in reflink_xattr_header 6901 * which will let the already accessed block gone. 6902 */ 6903 ret = ocfs2_xattr_bucket_journal_access(handle, 6904 args->new_bucket, 6905 OCFS2_JOURNAL_ACCESS_WRITE); 6906 if (ret) { 6907 mlog_errno(ret); 6908 break; 6909 } 6910 6911 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket); 6912 6913 ocfs2_xattr_bucket_relse(args->old_bucket); 6914 ocfs2_xattr_bucket_relse(args->new_bucket); 6915 } 6916 6917 ocfs2_xattr_bucket_relse(args->old_bucket); 6918 ocfs2_xattr_bucket_relse(args->new_bucket); 6919 return ret; 6920 } 6921 6922 static int ocfs2_reflink_xattr_buckets(handle_t *handle, 6923 struct inode *inode, 6924 struct ocfs2_reflink_xattr_tree_args *args, 6925 struct ocfs2_extent_tree *et, 6926 struct ocfs2_alloc_context *meta_ac, 6927 struct ocfs2_alloc_context *data_ac, 6928 u64 blkno, u32 cpos, u32 len) 6929 { 6930 int ret, first_inserted = 0; 6931 u32 p_cluster, num_clusters, reflink_cpos = 0; 6932 u64 new_blkno; 6933 unsigned int num_buckets, reflink_buckets; 6934 unsigned int bpc = 6935 ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)); 6936 6937 ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno); 6938 if (ret) { 6939 mlog_errno(ret); 6940 goto out; 6941 } 6942 num_buckets = le16_to_cpu(bucket_xh(args->old_bucket)->xh_num_buckets); 6943 ocfs2_xattr_bucket_relse(args->old_bucket); 6944 6945 while (len && num_buckets) { 6946 ret = ocfs2_claim_clusters(handle, data_ac, 6947 1, &p_cluster, &num_clusters); 6948 if (ret) { 6949 mlog_errno(ret); 6950 goto out; 6951 } 6952 6953 new_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 6954 reflink_buckets = min(num_buckets, bpc * num_clusters); 6955 6956 ret = ocfs2_reflink_xattr_bucket(handle, blkno, 6957 new_blkno, num_clusters, 6958 &reflink_cpos, reflink_buckets, 6959 meta_ac, data_ac, args); 6960 if (ret) { 6961 mlog_errno(ret); 6962 goto out; 6963 } 6964 6965 /* 6966 * For the 1st allocated cluster, we make it use the same cpos 6967 * so that the xattr tree looks the same as the original one 6968 * in the most case. 6969 */ 6970 if (!first_inserted) { 6971 reflink_cpos = cpos; 6972 first_inserted = 1; 6973 } 6974 ret = ocfs2_insert_extent(handle, et, reflink_cpos, new_blkno, 6975 num_clusters, 0, meta_ac); 6976 if (ret) 6977 mlog_errno(ret); 6978 6979 trace_ocfs2_reflink_xattr_buckets((unsigned long long)new_blkno, 6980 num_clusters, reflink_cpos); 6981 6982 len -= num_clusters; 6983 blkno += ocfs2_clusters_to_blocks(inode->i_sb, num_clusters); 6984 num_buckets -= reflink_buckets; 6985 } 6986 out: 6987 return ret; 6988 } 6989 6990 /* 6991 * Create the same xattr extent record in the new inode's xattr tree. 6992 */ 6993 static int ocfs2_reflink_xattr_rec(struct inode *inode, 6994 struct buffer_head *root_bh, 6995 u64 blkno, 6996 u32 cpos, 6997 u32 len, 6998 void *para) 6999 { 7000 int ret, credits = 0; 7001 handle_t *handle; 7002 struct ocfs2_reflink_xattr_tree_args *args = 7003 (struct ocfs2_reflink_xattr_tree_args *)para; 7004 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 7005 struct ocfs2_alloc_context *meta_ac = NULL; 7006 struct ocfs2_alloc_context *data_ac = NULL; 7007 struct ocfs2_extent_tree et; 7008 7009 trace_ocfs2_reflink_xattr_rec((unsigned long long)blkno, len); 7010 7011 ocfs2_init_xattr_tree_extent_tree(&et, 7012 INODE_CACHE(args->reflink->new_inode), 7013 args->new_blk_bh); 7014 7015 ret = ocfs2_lock_reflink_xattr_rec_allocators(args, &et, blkno, 7016 len, &credits, 7017 &meta_ac, &data_ac); 7018 if (ret) { 7019 mlog_errno(ret); 7020 goto out; 7021 } 7022 7023 handle = ocfs2_start_trans(osb, credits); 7024 if (IS_ERR(handle)) { 7025 ret = PTR_ERR(handle); 7026 mlog_errno(ret); 7027 goto out; 7028 } 7029 7030 ret = ocfs2_reflink_xattr_buckets(handle, inode, args, &et, 7031 meta_ac, data_ac, 7032 blkno, cpos, len); 7033 if (ret) 7034 mlog_errno(ret); 7035 7036 ocfs2_commit_trans(osb, handle); 7037 7038 out: 7039 if (meta_ac) 7040 ocfs2_free_alloc_context(meta_ac); 7041 if (data_ac) 7042 ocfs2_free_alloc_context(data_ac); 7043 return ret; 7044 } 7045 7046 /* 7047 * Create reflinked xattr buckets. 7048 * We will add bucket one by one, and refcount all the xattrs in the bucket 7049 * if they are stored outside. 7050 */ 7051 static int ocfs2_reflink_xattr_tree(struct ocfs2_xattr_reflink *args, 7052 struct buffer_head *blk_bh, 7053 struct buffer_head *new_blk_bh) 7054 { 7055 int ret; 7056 struct ocfs2_reflink_xattr_tree_args para; 7057 7058 memset(¶, 0, sizeof(para)); 7059 para.reflink = args; 7060 para.old_blk_bh = blk_bh; 7061 para.new_blk_bh = new_blk_bh; 7062 7063 para.old_bucket = ocfs2_xattr_bucket_new(args->old_inode); 7064 if (!para.old_bucket) { 7065 mlog_errno(-ENOMEM); 7066 return -ENOMEM; 7067 } 7068 7069 para.new_bucket = ocfs2_xattr_bucket_new(args->new_inode); 7070 if (!para.new_bucket) { 7071 ret = -ENOMEM; 7072 mlog_errno(ret); 7073 goto out; 7074 } 7075 7076 ret = ocfs2_iterate_xattr_index_block(args->old_inode, blk_bh, 7077 ocfs2_reflink_xattr_rec, 7078 ¶); 7079 if (ret) 7080 mlog_errno(ret); 7081 7082 out: 7083 ocfs2_xattr_bucket_free(para.old_bucket); 7084 ocfs2_xattr_bucket_free(para.new_bucket); 7085 return ret; 7086 } 7087 7088 static int ocfs2_reflink_xattr_in_block(struct ocfs2_xattr_reflink *args, 7089 struct buffer_head *blk_bh) 7090 { 7091 int ret, indexed = 0; 7092 struct buffer_head *new_blk_bh = NULL; 7093 struct ocfs2_xattr_block *xb = 7094 (struct ocfs2_xattr_block *)blk_bh->b_data; 7095 7096 7097 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) 7098 indexed = 1; 7099 7100 ret = ocfs2_create_empty_xattr_block(args->new_inode, args->new_bh, 7101 &new_blk_bh, indexed); 7102 if (ret) { 7103 mlog_errno(ret); 7104 goto out; 7105 } 7106 7107 if (!indexed) 7108 ret = ocfs2_reflink_xattr_block(args, blk_bh, new_blk_bh); 7109 else 7110 ret = ocfs2_reflink_xattr_tree(args, blk_bh, new_blk_bh); 7111 if (ret) 7112 mlog_errno(ret); 7113 7114 out: 7115 brelse(new_blk_bh); 7116 return ret; 7117 } 7118 7119 static int ocfs2_reflink_xattr_no_security(struct ocfs2_xattr_entry *xe) 7120 { 7121 int type = ocfs2_xattr_get_type(xe); 7122 7123 return type != OCFS2_XATTR_INDEX_SECURITY && 7124 type != OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS && 7125 type != OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT; 7126 } 7127 7128 int ocfs2_reflink_xattrs(struct inode *old_inode, 7129 struct buffer_head *old_bh, 7130 struct inode *new_inode, 7131 struct buffer_head *new_bh, 7132 bool preserve_security) 7133 { 7134 int ret; 7135 struct ocfs2_xattr_reflink args; 7136 struct ocfs2_inode_info *oi = OCFS2_I(old_inode); 7137 struct ocfs2_dinode *di = (struct ocfs2_dinode *)old_bh->b_data; 7138 struct buffer_head *blk_bh = NULL; 7139 struct ocfs2_cached_dealloc_ctxt dealloc; 7140 struct ocfs2_refcount_tree *ref_tree; 7141 struct buffer_head *ref_root_bh = NULL; 7142 7143 ret = ocfs2_lock_refcount_tree(OCFS2_SB(old_inode->i_sb), 7144 le64_to_cpu(di->i_refcount_loc), 7145 1, &ref_tree, &ref_root_bh); 7146 if (ret) { 7147 mlog_errno(ret); 7148 goto out; 7149 } 7150 7151 ocfs2_init_dealloc_ctxt(&dealloc); 7152 7153 args.old_inode = old_inode; 7154 args.new_inode = new_inode; 7155 args.old_bh = old_bh; 7156 args.new_bh = new_bh; 7157 args.ref_ci = &ref_tree->rf_ci; 7158 args.ref_root_bh = ref_root_bh; 7159 args.dealloc = &dealloc; 7160 if (preserve_security) 7161 args.xattr_reflinked = NULL; 7162 else 7163 args.xattr_reflinked = ocfs2_reflink_xattr_no_security; 7164 7165 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 7166 ret = ocfs2_reflink_xattr_inline(&args); 7167 if (ret) { 7168 mlog_errno(ret); 7169 goto out_unlock; 7170 } 7171 } 7172 7173 if (!di->i_xattr_loc) 7174 goto out_unlock; 7175 7176 ret = ocfs2_read_xattr_block(old_inode, le64_to_cpu(di->i_xattr_loc), 7177 &blk_bh); 7178 if (ret < 0) { 7179 mlog_errno(ret); 7180 goto out_unlock; 7181 } 7182 7183 ret = ocfs2_reflink_xattr_in_block(&args, blk_bh); 7184 if (ret) 7185 mlog_errno(ret); 7186 7187 brelse(blk_bh); 7188 7189 out_unlock: 7190 ocfs2_unlock_refcount_tree(OCFS2_SB(old_inode->i_sb), 7191 ref_tree, 1); 7192 brelse(ref_root_bh); 7193 7194 if (ocfs2_dealloc_has_cluster(&dealloc)) { 7195 ocfs2_schedule_truncate_log_flush(OCFS2_SB(old_inode->i_sb), 1); 7196 ocfs2_run_deallocs(OCFS2_SB(old_inode->i_sb), &dealloc); 7197 } 7198 7199 out: 7200 return ret; 7201 } 7202 7203 /* 7204 * Initialize security and acl for a already created inode. 7205 * Used for reflink a non-preserve-security file. 7206 * 7207 * It uses common api like ocfs2_xattr_set, so the caller 7208 * must not hold any lock expect i_rwsem. 7209 */ 7210 int ocfs2_init_security_and_acl(struct inode *dir, 7211 struct inode *inode, 7212 const struct qstr *qstr) 7213 { 7214 int ret = 0; 7215 struct buffer_head *dir_bh = NULL; 7216 7217 ret = ocfs2_init_security_get(inode, dir, qstr, NULL); 7218 if (ret) { 7219 mlog_errno(ret); 7220 goto leave; 7221 } 7222 7223 ret = ocfs2_inode_lock(dir, &dir_bh, 0); 7224 if (ret) { 7225 mlog_errno(ret); 7226 goto leave; 7227 } 7228 ret = ocfs2_init_acl(NULL, inode, dir, NULL, dir_bh, NULL, NULL); 7229 if (ret) 7230 mlog_errno(ret); 7231 7232 ocfs2_inode_unlock(dir, 0); 7233 brelse(dir_bh); 7234 leave: 7235 return ret; 7236 } 7237 7238 /* 7239 * 'security' attributes support 7240 */ 7241 static int ocfs2_xattr_security_get(const struct xattr_handler *handler, 7242 struct dentry *unused, struct inode *inode, 7243 const char *name, void *buffer, size_t size) 7244 { 7245 return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_SECURITY, 7246 name, buffer, size); 7247 } 7248 7249 static int ocfs2_xattr_security_set(const struct xattr_handler *handler, 7250 struct user_namespace *mnt_userns, 7251 struct dentry *unused, struct inode *inode, 7252 const char *name, const void *value, 7253 size_t size, int flags) 7254 { 7255 return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, 7256 name, value, size, flags); 7257 } 7258 7259 static int ocfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array, 7260 void *fs_info) 7261 { 7262 const struct xattr *xattr; 7263 int err = 0; 7264 7265 for (xattr = xattr_array; xattr->name != NULL; xattr++) { 7266 err = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, 7267 xattr->name, xattr->value, 7268 xattr->value_len, XATTR_CREATE); 7269 if (err) 7270 break; 7271 } 7272 return err; 7273 } 7274 7275 int ocfs2_init_security_get(struct inode *inode, 7276 struct inode *dir, 7277 const struct qstr *qstr, 7278 struct ocfs2_security_xattr_info *si) 7279 { 7280 /* check whether ocfs2 support feature xattr */ 7281 if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb))) 7282 return -EOPNOTSUPP; 7283 if (si) 7284 return security_old_inode_init_security(inode, dir, qstr, 7285 &si->name, &si->value, 7286 &si->value_len); 7287 7288 return security_inode_init_security(inode, dir, qstr, 7289 &ocfs2_initxattrs, NULL); 7290 } 7291 7292 int ocfs2_init_security_set(handle_t *handle, 7293 struct inode *inode, 7294 struct buffer_head *di_bh, 7295 struct ocfs2_security_xattr_info *si, 7296 struct ocfs2_alloc_context *xattr_ac, 7297 struct ocfs2_alloc_context *data_ac) 7298 { 7299 return ocfs2_xattr_set_handle(handle, inode, di_bh, 7300 OCFS2_XATTR_INDEX_SECURITY, 7301 si->name, si->value, si->value_len, 0, 7302 xattr_ac, data_ac); 7303 } 7304 7305 const struct xattr_handler ocfs2_xattr_security_handler = { 7306 .prefix = XATTR_SECURITY_PREFIX, 7307 .get = ocfs2_xattr_security_get, 7308 .set = ocfs2_xattr_security_set, 7309 }; 7310 7311 /* 7312 * 'trusted' attributes support 7313 */ 7314 static int ocfs2_xattr_trusted_get(const struct xattr_handler *handler, 7315 struct dentry *unused, struct inode *inode, 7316 const char *name, void *buffer, size_t size) 7317 { 7318 return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_TRUSTED, 7319 name, buffer, size); 7320 } 7321 7322 static int ocfs2_xattr_trusted_set(const struct xattr_handler *handler, 7323 struct user_namespace *mnt_userns, 7324 struct dentry *unused, struct inode *inode, 7325 const char *name, const void *value, 7326 size_t size, int flags) 7327 { 7328 return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_TRUSTED, 7329 name, value, size, flags); 7330 } 7331 7332 const struct xattr_handler ocfs2_xattr_trusted_handler = { 7333 .prefix = XATTR_TRUSTED_PREFIX, 7334 .get = ocfs2_xattr_trusted_get, 7335 .set = ocfs2_xattr_trusted_set, 7336 }; 7337 7338 /* 7339 * 'user' attributes support 7340 */ 7341 static int ocfs2_xattr_user_get(const struct xattr_handler *handler, 7342 struct dentry *unused, struct inode *inode, 7343 const char *name, void *buffer, size_t size) 7344 { 7345 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 7346 7347 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 7348 return -EOPNOTSUPP; 7349 return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_USER, name, 7350 buffer, size); 7351 } 7352 7353 static int ocfs2_xattr_user_set(const struct xattr_handler *handler, 7354 struct user_namespace *mnt_userns, 7355 struct dentry *unused, struct inode *inode, 7356 const char *name, const void *value, 7357 size_t size, int flags) 7358 { 7359 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 7360 7361 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 7362 return -EOPNOTSUPP; 7363 7364 return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_USER, 7365 name, value, size, flags); 7366 } 7367 7368 const struct xattr_handler ocfs2_xattr_user_handler = { 7369 .prefix = XATTR_USER_PREFIX, 7370 .get = ocfs2_xattr_user_get, 7371 .set = ocfs2_xattr_user_set, 7372 }; 7373