1 // SPDX-License-Identifier: GPL-2.0-only 2 /* -*- mode: c; c-basic-offset: 8; -*- 3 * vim: noexpandtab sw=8 ts=8 sts=0: 4 * 5 * xattr.c 6 * 7 * Copyright (C) 2004, 2008 Oracle. All rights reserved. 8 * 9 * CREDITS: 10 * Lots of code in this file is copy from linux/fs/ext3/xattr.c. 11 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> 12 */ 13 14 #include <linux/capability.h> 15 #include <linux/fs.h> 16 #include <linux/types.h> 17 #include <linux/slab.h> 18 #include <linux/highmem.h> 19 #include <linux/pagemap.h> 20 #include <linux/uio.h> 21 #include <linux/sched.h> 22 #include <linux/splice.h> 23 #include <linux/mount.h> 24 #include <linux/writeback.h> 25 #include <linux/falloc.h> 26 #include <linux/sort.h> 27 #include <linux/init.h> 28 #include <linux/module.h> 29 #include <linux/string.h> 30 #include <linux/security.h> 31 32 #include <cluster/masklog.h> 33 34 #include "ocfs2.h" 35 #include "alloc.h" 36 #include "blockcheck.h" 37 #include "dlmglue.h" 38 #include "file.h" 39 #include "symlink.h" 40 #include "sysfile.h" 41 #include "inode.h" 42 #include "journal.h" 43 #include "ocfs2_fs.h" 44 #include "suballoc.h" 45 #include "uptodate.h" 46 #include "buffer_head_io.h" 47 #include "super.h" 48 #include "xattr.h" 49 #include "refcounttree.h" 50 #include "acl.h" 51 #include "ocfs2_trace.h" 52 53 struct ocfs2_xattr_def_value_root { 54 struct ocfs2_xattr_value_root xv; 55 struct ocfs2_extent_rec er; 56 }; 57 58 struct ocfs2_xattr_bucket { 59 /* The inode these xattrs are associated with */ 60 struct inode *bu_inode; 61 62 /* The actual buffers that make up the bucket */ 63 struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET]; 64 65 /* How many blocks make up one bucket for this filesystem */ 66 int bu_blocks; 67 }; 68 69 struct ocfs2_xattr_set_ctxt { 70 handle_t *handle; 71 struct ocfs2_alloc_context *meta_ac; 72 struct ocfs2_alloc_context *data_ac; 73 struct ocfs2_cached_dealloc_ctxt dealloc; 74 int set_abort; 75 }; 76 77 #define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root)) 78 #define OCFS2_XATTR_INLINE_SIZE 80 79 #define OCFS2_XATTR_HEADER_GAP 4 80 #define OCFS2_XATTR_FREE_IN_IBODY (OCFS2_MIN_XATTR_INLINE_SIZE \ 81 - sizeof(struct ocfs2_xattr_header) \ 82 - OCFS2_XATTR_HEADER_GAP) 83 #define OCFS2_XATTR_FREE_IN_BLOCK(ptr) ((ptr)->i_sb->s_blocksize \ 84 - sizeof(struct ocfs2_xattr_block) \ 85 - sizeof(struct ocfs2_xattr_header) \ 86 - OCFS2_XATTR_HEADER_GAP) 87 88 static struct ocfs2_xattr_def_value_root def_xv = { 89 .xv.xr_list.l_count = cpu_to_le16(1), 90 }; 91 92 const struct xattr_handler *ocfs2_xattr_handlers[] = { 93 &ocfs2_xattr_user_handler, 94 &posix_acl_access_xattr_handler, 95 &posix_acl_default_xattr_handler, 96 &ocfs2_xattr_trusted_handler, 97 &ocfs2_xattr_security_handler, 98 NULL 99 }; 100 101 static const struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = { 102 [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler, 103 [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS] 104 = &posix_acl_access_xattr_handler, 105 [OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT] 106 = &posix_acl_default_xattr_handler, 107 [OCFS2_XATTR_INDEX_TRUSTED] = &ocfs2_xattr_trusted_handler, 108 [OCFS2_XATTR_INDEX_SECURITY] = &ocfs2_xattr_security_handler, 109 }; 110 111 struct ocfs2_xattr_info { 112 int xi_name_index; 113 const char *xi_name; 114 int xi_name_len; 115 const void *xi_value; 116 size_t xi_value_len; 117 }; 118 119 struct ocfs2_xattr_search { 120 struct buffer_head *inode_bh; 121 /* 122 * xattr_bh point to the block buffer head which has extended attribute 123 * when extended attribute in inode, xattr_bh is equal to inode_bh. 124 */ 125 struct buffer_head *xattr_bh; 126 struct ocfs2_xattr_header *header; 127 struct ocfs2_xattr_bucket *bucket; 128 void *base; 129 void *end; 130 struct ocfs2_xattr_entry *here; 131 int not_found; 132 }; 133 134 /* Operations on struct ocfs2_xa_entry */ 135 struct ocfs2_xa_loc; 136 struct ocfs2_xa_loc_operations { 137 /* 138 * Journal functions 139 */ 140 int (*xlo_journal_access)(handle_t *handle, struct ocfs2_xa_loc *loc, 141 int type); 142 void (*xlo_journal_dirty)(handle_t *handle, struct ocfs2_xa_loc *loc); 143 144 /* 145 * Return a pointer to the appropriate buffer in loc->xl_storage 146 * at the given offset from loc->xl_header. 147 */ 148 void *(*xlo_offset_pointer)(struct ocfs2_xa_loc *loc, int offset); 149 150 /* Can we reuse the existing entry for the new value? */ 151 int (*xlo_can_reuse)(struct ocfs2_xa_loc *loc, 152 struct ocfs2_xattr_info *xi); 153 154 /* How much space is needed for the new value? */ 155 int (*xlo_check_space)(struct ocfs2_xa_loc *loc, 156 struct ocfs2_xattr_info *xi); 157 158 /* 159 * Return the offset of the first name+value pair. This is 160 * the start of our downward-filling free space. 161 */ 162 int (*xlo_get_free_start)(struct ocfs2_xa_loc *loc); 163 164 /* 165 * Remove the name+value at this location. Do whatever is 166 * appropriate with the remaining name+value pairs. 167 */ 168 void (*xlo_wipe_namevalue)(struct ocfs2_xa_loc *loc); 169 170 /* Fill xl_entry with a new entry */ 171 void (*xlo_add_entry)(struct ocfs2_xa_loc *loc, u32 name_hash); 172 173 /* Add name+value storage to an entry */ 174 void (*xlo_add_namevalue)(struct ocfs2_xa_loc *loc, int size); 175 176 /* 177 * Initialize the value buf's access and bh fields for this entry. 178 * ocfs2_xa_fill_value_buf() will handle the xv pointer. 179 */ 180 void (*xlo_fill_value_buf)(struct ocfs2_xa_loc *loc, 181 struct ocfs2_xattr_value_buf *vb); 182 }; 183 184 /* 185 * Describes an xattr entry location. This is a memory structure 186 * tracking the on-disk structure. 187 */ 188 struct ocfs2_xa_loc { 189 /* This xattr belongs to this inode */ 190 struct inode *xl_inode; 191 192 /* The ocfs2_xattr_header inside the on-disk storage. Not NULL. */ 193 struct ocfs2_xattr_header *xl_header; 194 195 /* Bytes from xl_header to the end of the storage */ 196 int xl_size; 197 198 /* 199 * The ocfs2_xattr_entry this location describes. If this is 200 * NULL, this location describes the on-disk structure where it 201 * would have been. 202 */ 203 struct ocfs2_xattr_entry *xl_entry; 204 205 /* 206 * Internal housekeeping 207 */ 208 209 /* Buffer(s) containing this entry */ 210 void *xl_storage; 211 212 /* Operations on the storage backing this location */ 213 const struct ocfs2_xa_loc_operations *xl_ops; 214 }; 215 216 /* 217 * Convenience functions to calculate how much space is needed for a 218 * given name+value pair 219 */ 220 static int namevalue_size(int name_len, uint64_t value_len) 221 { 222 if (value_len > OCFS2_XATTR_INLINE_SIZE) 223 return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE; 224 else 225 return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len); 226 } 227 228 static int namevalue_size_xi(struct ocfs2_xattr_info *xi) 229 { 230 return namevalue_size(xi->xi_name_len, xi->xi_value_len); 231 } 232 233 static int namevalue_size_xe(struct ocfs2_xattr_entry *xe) 234 { 235 u64 value_len = le64_to_cpu(xe->xe_value_size); 236 237 BUG_ON((value_len > OCFS2_XATTR_INLINE_SIZE) && 238 ocfs2_xattr_is_local(xe)); 239 return namevalue_size(xe->xe_name_len, value_len); 240 } 241 242 243 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb, 244 struct ocfs2_xattr_header *xh, 245 int index, 246 int *block_off, 247 int *new_offset); 248 249 static int ocfs2_xattr_block_find(struct inode *inode, 250 int name_index, 251 const char *name, 252 struct ocfs2_xattr_search *xs); 253 static int ocfs2_xattr_index_block_find(struct inode *inode, 254 struct buffer_head *root_bh, 255 int name_index, 256 const char *name, 257 struct ocfs2_xattr_search *xs); 258 259 static int ocfs2_xattr_tree_list_index_block(struct inode *inode, 260 struct buffer_head *blk_bh, 261 char *buffer, 262 size_t buffer_size); 263 264 static int ocfs2_xattr_create_index_block(struct inode *inode, 265 struct ocfs2_xattr_search *xs, 266 struct ocfs2_xattr_set_ctxt *ctxt); 267 268 static int ocfs2_xattr_set_entry_index_block(struct inode *inode, 269 struct ocfs2_xattr_info *xi, 270 struct ocfs2_xattr_search *xs, 271 struct ocfs2_xattr_set_ctxt *ctxt); 272 273 typedef int (xattr_tree_rec_func)(struct inode *inode, 274 struct buffer_head *root_bh, 275 u64 blkno, u32 cpos, u32 len, void *para); 276 static int ocfs2_iterate_xattr_index_block(struct inode *inode, 277 struct buffer_head *root_bh, 278 xattr_tree_rec_func *rec_func, 279 void *para); 280 static int ocfs2_delete_xattr_in_bucket(struct inode *inode, 281 struct ocfs2_xattr_bucket *bucket, 282 void *para); 283 static int ocfs2_rm_xattr_cluster(struct inode *inode, 284 struct buffer_head *root_bh, 285 u64 blkno, 286 u32 cpos, 287 u32 len, 288 void *para); 289 290 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle, 291 u64 src_blk, u64 last_blk, u64 to_blk, 292 unsigned int start_bucket, 293 u32 *first_hash); 294 static int ocfs2_prepare_refcount_xattr(struct inode *inode, 295 struct ocfs2_dinode *di, 296 struct ocfs2_xattr_info *xi, 297 struct ocfs2_xattr_search *xis, 298 struct ocfs2_xattr_search *xbs, 299 struct ocfs2_refcount_tree **ref_tree, 300 int *meta_need, 301 int *credits); 302 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb, 303 struct ocfs2_xattr_bucket *bucket, 304 int offset, 305 struct ocfs2_xattr_value_root **xv, 306 struct buffer_head **bh); 307 308 static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb) 309 { 310 return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE; 311 } 312 313 static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb) 314 { 315 return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits); 316 } 317 318 #define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr) 319 #define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data) 320 #define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0)) 321 322 static struct ocfs2_xattr_bucket *ocfs2_xattr_bucket_new(struct inode *inode) 323 { 324 struct ocfs2_xattr_bucket *bucket; 325 int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 326 327 BUG_ON(blks > OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET); 328 329 bucket = kzalloc(sizeof(struct ocfs2_xattr_bucket), GFP_NOFS); 330 if (bucket) { 331 bucket->bu_inode = inode; 332 bucket->bu_blocks = blks; 333 } 334 335 return bucket; 336 } 337 338 static void ocfs2_xattr_bucket_relse(struct ocfs2_xattr_bucket *bucket) 339 { 340 int i; 341 342 for (i = 0; i < bucket->bu_blocks; i++) { 343 brelse(bucket->bu_bhs[i]); 344 bucket->bu_bhs[i] = NULL; 345 } 346 } 347 348 static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket) 349 { 350 if (bucket) { 351 ocfs2_xattr_bucket_relse(bucket); 352 bucket->bu_inode = NULL; 353 kfree(bucket); 354 } 355 } 356 357 /* 358 * A bucket that has never been written to disk doesn't need to be 359 * read. We just need the buffer_heads. Don't call this for 360 * buckets that are already on disk. ocfs2_read_xattr_bucket() initializes 361 * them fully. 362 */ 363 static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket, 364 u64 xb_blkno, int new) 365 { 366 int i, rc = 0; 367 368 for (i = 0; i < bucket->bu_blocks; i++) { 369 bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb, 370 xb_blkno + i); 371 if (!bucket->bu_bhs[i]) { 372 rc = -ENOMEM; 373 mlog_errno(rc); 374 break; 375 } 376 377 if (!ocfs2_buffer_uptodate(INODE_CACHE(bucket->bu_inode), 378 bucket->bu_bhs[i])) { 379 if (new) 380 ocfs2_set_new_buffer_uptodate(INODE_CACHE(bucket->bu_inode), 381 bucket->bu_bhs[i]); 382 else { 383 set_buffer_uptodate(bucket->bu_bhs[i]); 384 ocfs2_set_buffer_uptodate(INODE_CACHE(bucket->bu_inode), 385 bucket->bu_bhs[i]); 386 } 387 } 388 } 389 390 if (rc) 391 ocfs2_xattr_bucket_relse(bucket); 392 return rc; 393 } 394 395 /* Read the xattr bucket at xb_blkno */ 396 static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket, 397 u64 xb_blkno) 398 { 399 int rc; 400 401 rc = ocfs2_read_blocks(INODE_CACHE(bucket->bu_inode), xb_blkno, 402 bucket->bu_blocks, bucket->bu_bhs, 0, 403 NULL); 404 if (!rc) { 405 spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 406 rc = ocfs2_validate_meta_ecc_bhs(bucket->bu_inode->i_sb, 407 bucket->bu_bhs, 408 bucket->bu_blocks, 409 &bucket_xh(bucket)->xh_check); 410 spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 411 if (rc) 412 mlog_errno(rc); 413 } 414 415 if (rc) 416 ocfs2_xattr_bucket_relse(bucket); 417 return rc; 418 } 419 420 static int ocfs2_xattr_bucket_journal_access(handle_t *handle, 421 struct ocfs2_xattr_bucket *bucket, 422 int type) 423 { 424 int i, rc = 0; 425 426 for (i = 0; i < bucket->bu_blocks; i++) { 427 rc = ocfs2_journal_access(handle, 428 INODE_CACHE(bucket->bu_inode), 429 bucket->bu_bhs[i], type); 430 if (rc) { 431 mlog_errno(rc); 432 break; 433 } 434 } 435 436 return rc; 437 } 438 439 static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle, 440 struct ocfs2_xattr_bucket *bucket) 441 { 442 int i; 443 444 spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 445 ocfs2_compute_meta_ecc_bhs(bucket->bu_inode->i_sb, 446 bucket->bu_bhs, bucket->bu_blocks, 447 &bucket_xh(bucket)->xh_check); 448 spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 449 450 for (i = 0; i < bucket->bu_blocks; i++) 451 ocfs2_journal_dirty(handle, bucket->bu_bhs[i]); 452 } 453 454 static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest, 455 struct ocfs2_xattr_bucket *src) 456 { 457 int i; 458 int blocksize = src->bu_inode->i_sb->s_blocksize; 459 460 BUG_ON(dest->bu_blocks != src->bu_blocks); 461 BUG_ON(dest->bu_inode != src->bu_inode); 462 463 for (i = 0; i < src->bu_blocks; i++) { 464 memcpy(bucket_block(dest, i), bucket_block(src, i), 465 blocksize); 466 } 467 } 468 469 static int ocfs2_validate_xattr_block(struct super_block *sb, 470 struct buffer_head *bh) 471 { 472 int rc; 473 struct ocfs2_xattr_block *xb = 474 (struct ocfs2_xattr_block *)bh->b_data; 475 476 trace_ocfs2_validate_xattr_block((unsigned long long)bh->b_blocknr); 477 478 BUG_ON(!buffer_uptodate(bh)); 479 480 /* 481 * If the ecc fails, we return the error but otherwise 482 * leave the filesystem running. We know any error is 483 * local to this block. 484 */ 485 rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &xb->xb_check); 486 if (rc) 487 return rc; 488 489 /* 490 * Errors after here are fatal 491 */ 492 493 if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) { 494 return ocfs2_error(sb, 495 "Extended attribute block #%llu has bad signature %.*s\n", 496 (unsigned long long)bh->b_blocknr, 7, 497 xb->xb_signature); 498 } 499 500 if (le64_to_cpu(xb->xb_blkno) != bh->b_blocknr) { 501 return ocfs2_error(sb, 502 "Extended attribute block #%llu has an invalid xb_blkno of %llu\n", 503 (unsigned long long)bh->b_blocknr, 504 (unsigned long long)le64_to_cpu(xb->xb_blkno)); 505 } 506 507 if (le32_to_cpu(xb->xb_fs_generation) != OCFS2_SB(sb)->fs_generation) { 508 return ocfs2_error(sb, 509 "Extended attribute block #%llu has an invalid xb_fs_generation of #%u\n", 510 (unsigned long long)bh->b_blocknr, 511 le32_to_cpu(xb->xb_fs_generation)); 512 } 513 514 return 0; 515 } 516 517 static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno, 518 struct buffer_head **bh) 519 { 520 int rc; 521 struct buffer_head *tmp = *bh; 522 523 rc = ocfs2_read_block(INODE_CACHE(inode), xb_blkno, &tmp, 524 ocfs2_validate_xattr_block); 525 526 /* If ocfs2_read_block() got us a new bh, pass it up. */ 527 if (!rc && !*bh) 528 *bh = tmp; 529 530 return rc; 531 } 532 533 static inline const char *ocfs2_xattr_prefix(int name_index) 534 { 535 const struct xattr_handler *handler = NULL; 536 537 if (name_index > 0 && name_index < OCFS2_XATTR_MAX) 538 handler = ocfs2_xattr_handler_map[name_index]; 539 return handler ? xattr_prefix(handler) : NULL; 540 } 541 542 static u32 ocfs2_xattr_name_hash(struct inode *inode, 543 const char *name, 544 int name_len) 545 { 546 /* Get hash value of uuid from super block */ 547 u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash; 548 int i; 549 550 /* hash extended attribute name */ 551 for (i = 0; i < name_len; i++) { 552 hash = (hash << OCFS2_HASH_SHIFT) ^ 553 (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^ 554 *name++; 555 } 556 557 return hash; 558 } 559 560 static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len) 561 { 562 return namevalue_size(name_len, value_len) + 563 sizeof(struct ocfs2_xattr_entry); 564 } 565 566 static int ocfs2_xi_entry_usage(struct ocfs2_xattr_info *xi) 567 { 568 return namevalue_size_xi(xi) + 569 sizeof(struct ocfs2_xattr_entry); 570 } 571 572 static int ocfs2_xe_entry_usage(struct ocfs2_xattr_entry *xe) 573 { 574 return namevalue_size_xe(xe) + 575 sizeof(struct ocfs2_xattr_entry); 576 } 577 578 int ocfs2_calc_security_init(struct inode *dir, 579 struct ocfs2_security_xattr_info *si, 580 int *want_clusters, 581 int *xattr_credits, 582 struct ocfs2_alloc_context **xattr_ac) 583 { 584 int ret = 0; 585 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); 586 int s_size = ocfs2_xattr_entry_real_size(strlen(si->name), 587 si->value_len); 588 589 /* 590 * The max space of security xattr taken inline is 591 * 256(name) + 80(value) + 16(entry) = 352 bytes, 592 * So reserve one metadata block for it is ok. 593 */ 594 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE || 595 s_size > OCFS2_XATTR_FREE_IN_IBODY) { 596 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac); 597 if (ret) { 598 mlog_errno(ret); 599 return ret; 600 } 601 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; 602 } 603 604 /* reserve clusters for xattr value which will be set in B tree*/ 605 if (si->value_len > OCFS2_XATTR_INLINE_SIZE) { 606 int new_clusters = ocfs2_clusters_for_bytes(dir->i_sb, 607 si->value_len); 608 609 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb, 610 new_clusters); 611 *want_clusters += new_clusters; 612 } 613 return ret; 614 } 615 616 int ocfs2_calc_xattr_init(struct inode *dir, 617 struct buffer_head *dir_bh, 618 umode_t mode, 619 struct ocfs2_security_xattr_info *si, 620 int *want_clusters, 621 int *xattr_credits, 622 int *want_meta) 623 { 624 int ret = 0; 625 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); 626 int s_size = 0, a_size = 0, acl_len = 0, new_clusters; 627 628 if (si->enable) 629 s_size = ocfs2_xattr_entry_real_size(strlen(si->name), 630 si->value_len); 631 632 if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) { 633 down_read(&OCFS2_I(dir)->ip_xattr_sem); 634 acl_len = ocfs2_xattr_get_nolock(dir, dir_bh, 635 OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT, 636 "", NULL, 0); 637 up_read(&OCFS2_I(dir)->ip_xattr_sem); 638 if (acl_len > 0) { 639 a_size = ocfs2_xattr_entry_real_size(0, acl_len); 640 if (S_ISDIR(mode)) 641 a_size <<= 1; 642 } else if (acl_len != 0 && acl_len != -ENODATA) { 643 ret = acl_len; 644 mlog_errno(ret); 645 return ret; 646 } 647 } 648 649 if (!(s_size + a_size)) 650 return ret; 651 652 /* 653 * The max space of security xattr taken inline is 654 * 256(name) + 80(value) + 16(entry) = 352 bytes, 655 * The max space of acl xattr taken inline is 656 * 80(value) + 16(entry) * 2(if directory) = 192 bytes, 657 * when blocksize = 512, may reserve one more cluser for 658 * xattr bucket, otherwise reserve one metadata block 659 * for them is ok. 660 * If this is a new directory with inline data, 661 * we choose to reserve the entire inline area for 662 * directory contents and force an external xattr block. 663 */ 664 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE || 665 (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) || 666 (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) { 667 *want_meta = *want_meta + 1; 668 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; 669 } 670 671 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE && 672 (s_size + a_size) > OCFS2_XATTR_FREE_IN_BLOCK(dir)) { 673 *want_clusters += 1; 674 *xattr_credits += ocfs2_blocks_per_xattr_bucket(dir->i_sb); 675 } 676 677 /* 678 * reserve credits and clusters for xattrs which has large value 679 * and have to be set outside 680 */ 681 if (si->enable && si->value_len > OCFS2_XATTR_INLINE_SIZE) { 682 new_clusters = ocfs2_clusters_for_bytes(dir->i_sb, 683 si->value_len); 684 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb, 685 new_clusters); 686 *want_clusters += new_clusters; 687 } 688 if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL && 689 acl_len > OCFS2_XATTR_INLINE_SIZE) { 690 /* for directory, it has DEFAULT and ACCESS two types of acls */ 691 new_clusters = (S_ISDIR(mode) ? 2 : 1) * 692 ocfs2_clusters_for_bytes(dir->i_sb, acl_len); 693 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb, 694 new_clusters); 695 *want_clusters += new_clusters; 696 } 697 698 return ret; 699 } 700 701 static int ocfs2_xattr_extend_allocation(struct inode *inode, 702 u32 clusters_to_add, 703 struct ocfs2_xattr_value_buf *vb, 704 struct ocfs2_xattr_set_ctxt *ctxt) 705 { 706 int status = 0, credits; 707 handle_t *handle = ctxt->handle; 708 enum ocfs2_alloc_restarted why; 709 u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters); 710 struct ocfs2_extent_tree et; 711 712 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); 713 714 while (clusters_to_add) { 715 trace_ocfs2_xattr_extend_allocation(clusters_to_add); 716 717 status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh, 718 OCFS2_JOURNAL_ACCESS_WRITE); 719 if (status < 0) { 720 mlog_errno(status); 721 break; 722 } 723 724 prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters); 725 status = ocfs2_add_clusters_in_btree(handle, 726 &et, 727 &logical_start, 728 clusters_to_add, 729 0, 730 ctxt->data_ac, 731 ctxt->meta_ac, 732 &why); 733 if ((status < 0) && (status != -EAGAIN)) { 734 if (status != -ENOSPC) 735 mlog_errno(status); 736 break; 737 } 738 739 ocfs2_journal_dirty(handle, vb->vb_bh); 740 741 clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - 742 prev_clusters; 743 744 if (why != RESTART_NONE && clusters_to_add) { 745 /* 746 * We can only fail in case the alloc file doesn't give 747 * up enough clusters. 748 */ 749 BUG_ON(why == RESTART_META); 750 751 credits = ocfs2_calc_extend_credits(inode->i_sb, 752 &vb->vb_xv->xr_list); 753 status = ocfs2_extend_trans(handle, credits); 754 if (status < 0) { 755 status = -ENOMEM; 756 mlog_errno(status); 757 break; 758 } 759 } 760 } 761 762 return status; 763 } 764 765 static int __ocfs2_remove_xattr_range(struct inode *inode, 766 struct ocfs2_xattr_value_buf *vb, 767 u32 cpos, u32 phys_cpos, u32 len, 768 unsigned int ext_flags, 769 struct ocfs2_xattr_set_ctxt *ctxt) 770 { 771 int ret; 772 u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); 773 handle_t *handle = ctxt->handle; 774 struct ocfs2_extent_tree et; 775 776 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); 777 778 ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh, 779 OCFS2_JOURNAL_ACCESS_WRITE); 780 if (ret) { 781 mlog_errno(ret); 782 goto out; 783 } 784 785 ret = ocfs2_remove_extent(handle, &et, cpos, len, ctxt->meta_ac, 786 &ctxt->dealloc); 787 if (ret) { 788 mlog_errno(ret); 789 goto out; 790 } 791 792 le32_add_cpu(&vb->vb_xv->xr_clusters, -len); 793 ocfs2_journal_dirty(handle, vb->vb_bh); 794 795 if (ext_flags & OCFS2_EXT_REFCOUNTED) 796 ret = ocfs2_decrease_refcount(inode, handle, 797 ocfs2_blocks_to_clusters(inode->i_sb, 798 phys_blkno), 799 len, ctxt->meta_ac, &ctxt->dealloc, 1); 800 else 801 ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc, 802 phys_blkno, len); 803 if (ret) 804 mlog_errno(ret); 805 806 out: 807 return ret; 808 } 809 810 static int ocfs2_xattr_shrink_size(struct inode *inode, 811 u32 old_clusters, 812 u32 new_clusters, 813 struct ocfs2_xattr_value_buf *vb, 814 struct ocfs2_xattr_set_ctxt *ctxt) 815 { 816 int ret = 0; 817 unsigned int ext_flags; 818 u32 trunc_len, cpos, phys_cpos, alloc_size; 819 u64 block; 820 821 if (old_clusters <= new_clusters) 822 return 0; 823 824 cpos = new_clusters; 825 trunc_len = old_clusters - new_clusters; 826 while (trunc_len) { 827 ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos, 828 &alloc_size, 829 &vb->vb_xv->xr_list, &ext_flags); 830 if (ret) { 831 mlog_errno(ret); 832 goto out; 833 } 834 835 if (alloc_size > trunc_len) 836 alloc_size = trunc_len; 837 838 ret = __ocfs2_remove_xattr_range(inode, vb, cpos, 839 phys_cpos, alloc_size, 840 ext_flags, ctxt); 841 if (ret) { 842 mlog_errno(ret); 843 goto out; 844 } 845 846 block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); 847 ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), 848 block, alloc_size); 849 cpos += alloc_size; 850 trunc_len -= alloc_size; 851 } 852 853 out: 854 return ret; 855 } 856 857 static int ocfs2_xattr_value_truncate(struct inode *inode, 858 struct ocfs2_xattr_value_buf *vb, 859 int len, 860 struct ocfs2_xattr_set_ctxt *ctxt) 861 { 862 int ret; 863 u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len); 864 u32 old_clusters = le32_to_cpu(vb->vb_xv->xr_clusters); 865 866 if (new_clusters == old_clusters) 867 return 0; 868 869 if (new_clusters > old_clusters) 870 ret = ocfs2_xattr_extend_allocation(inode, 871 new_clusters - old_clusters, 872 vb, ctxt); 873 else 874 ret = ocfs2_xattr_shrink_size(inode, 875 old_clusters, new_clusters, 876 vb, ctxt); 877 878 return ret; 879 } 880 881 static int ocfs2_xattr_list_entry(struct super_block *sb, 882 char *buffer, size_t size, 883 size_t *result, int type, 884 const char *name, int name_len) 885 { 886 char *p = buffer + *result; 887 const char *prefix; 888 int prefix_len; 889 int total_len; 890 891 switch(type) { 892 case OCFS2_XATTR_INDEX_USER: 893 if (OCFS2_SB(sb)->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 894 return 0; 895 break; 896 897 case OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS: 898 case OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT: 899 if (!(sb->s_flags & SB_POSIXACL)) 900 return 0; 901 break; 902 903 case OCFS2_XATTR_INDEX_TRUSTED: 904 if (!capable(CAP_SYS_ADMIN)) 905 return 0; 906 break; 907 } 908 909 prefix = ocfs2_xattr_prefix(type); 910 if (!prefix) 911 return 0; 912 prefix_len = strlen(prefix); 913 total_len = prefix_len + name_len + 1; 914 *result += total_len; 915 916 /* we are just looking for how big our buffer needs to be */ 917 if (!size) 918 return 0; 919 920 if (*result > size) 921 return -ERANGE; 922 923 memcpy(p, prefix, prefix_len); 924 memcpy(p + prefix_len, name, name_len); 925 p[prefix_len + name_len] = '\0'; 926 927 return 0; 928 } 929 930 static int ocfs2_xattr_list_entries(struct inode *inode, 931 struct ocfs2_xattr_header *header, 932 char *buffer, size_t buffer_size) 933 { 934 size_t result = 0; 935 int i, type, ret; 936 const char *name; 937 938 for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) { 939 struct ocfs2_xattr_entry *entry = &header->xh_entries[i]; 940 type = ocfs2_xattr_get_type(entry); 941 name = (const char *)header + 942 le16_to_cpu(entry->xe_name_offset); 943 944 ret = ocfs2_xattr_list_entry(inode->i_sb, 945 buffer, buffer_size, 946 &result, type, name, 947 entry->xe_name_len); 948 if (ret) 949 return ret; 950 } 951 952 return result; 953 } 954 955 int ocfs2_has_inline_xattr_value_outside(struct inode *inode, 956 struct ocfs2_dinode *di) 957 { 958 struct ocfs2_xattr_header *xh; 959 int i; 960 961 xh = (struct ocfs2_xattr_header *) 962 ((void *)di + inode->i_sb->s_blocksize - 963 le16_to_cpu(di->i_xattr_inline_size)); 964 965 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) 966 if (!ocfs2_xattr_is_local(&xh->xh_entries[i])) 967 return 1; 968 969 return 0; 970 } 971 972 static int ocfs2_xattr_ibody_list(struct inode *inode, 973 struct ocfs2_dinode *di, 974 char *buffer, 975 size_t buffer_size) 976 { 977 struct ocfs2_xattr_header *header = NULL; 978 struct ocfs2_inode_info *oi = OCFS2_I(inode); 979 int ret = 0; 980 981 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) 982 return ret; 983 984 header = (struct ocfs2_xattr_header *) 985 ((void *)di + inode->i_sb->s_blocksize - 986 le16_to_cpu(di->i_xattr_inline_size)); 987 988 ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size); 989 990 return ret; 991 } 992 993 static int ocfs2_xattr_block_list(struct inode *inode, 994 struct ocfs2_dinode *di, 995 char *buffer, 996 size_t buffer_size) 997 { 998 struct buffer_head *blk_bh = NULL; 999 struct ocfs2_xattr_block *xb; 1000 int ret = 0; 1001 1002 if (!di->i_xattr_loc) 1003 return ret; 1004 1005 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc), 1006 &blk_bh); 1007 if (ret < 0) { 1008 mlog_errno(ret); 1009 return ret; 1010 } 1011 1012 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 1013 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 1014 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header; 1015 ret = ocfs2_xattr_list_entries(inode, header, 1016 buffer, buffer_size); 1017 } else 1018 ret = ocfs2_xattr_tree_list_index_block(inode, blk_bh, 1019 buffer, buffer_size); 1020 1021 brelse(blk_bh); 1022 1023 return ret; 1024 } 1025 1026 ssize_t ocfs2_listxattr(struct dentry *dentry, 1027 char *buffer, 1028 size_t size) 1029 { 1030 int ret = 0, i_ret = 0, b_ret = 0; 1031 struct buffer_head *di_bh = NULL; 1032 struct ocfs2_dinode *di = NULL; 1033 struct ocfs2_inode_info *oi = OCFS2_I(d_inode(dentry)); 1034 1035 if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb))) 1036 return -EOPNOTSUPP; 1037 1038 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) 1039 return ret; 1040 1041 ret = ocfs2_inode_lock(d_inode(dentry), &di_bh, 0); 1042 if (ret < 0) { 1043 mlog_errno(ret); 1044 return ret; 1045 } 1046 1047 di = (struct ocfs2_dinode *)di_bh->b_data; 1048 1049 down_read(&oi->ip_xattr_sem); 1050 i_ret = ocfs2_xattr_ibody_list(d_inode(dentry), di, buffer, size); 1051 if (i_ret < 0) 1052 b_ret = 0; 1053 else { 1054 if (buffer) { 1055 buffer += i_ret; 1056 size -= i_ret; 1057 } 1058 b_ret = ocfs2_xattr_block_list(d_inode(dentry), di, 1059 buffer, size); 1060 if (b_ret < 0) 1061 i_ret = 0; 1062 } 1063 up_read(&oi->ip_xattr_sem); 1064 ocfs2_inode_unlock(d_inode(dentry), 0); 1065 1066 brelse(di_bh); 1067 1068 return i_ret + b_ret; 1069 } 1070 1071 static int ocfs2_xattr_find_entry(int name_index, 1072 const char *name, 1073 struct ocfs2_xattr_search *xs) 1074 { 1075 struct ocfs2_xattr_entry *entry; 1076 size_t name_len; 1077 int i, cmp = 1; 1078 1079 if (name == NULL) 1080 return -EINVAL; 1081 1082 name_len = strlen(name); 1083 entry = xs->here; 1084 for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) { 1085 cmp = name_index - ocfs2_xattr_get_type(entry); 1086 if (!cmp) 1087 cmp = name_len - entry->xe_name_len; 1088 if (!cmp) 1089 cmp = memcmp(name, (xs->base + 1090 le16_to_cpu(entry->xe_name_offset)), 1091 name_len); 1092 if (cmp == 0) 1093 break; 1094 entry += 1; 1095 } 1096 xs->here = entry; 1097 1098 return cmp ? -ENODATA : 0; 1099 } 1100 1101 static int ocfs2_xattr_get_value_outside(struct inode *inode, 1102 struct ocfs2_xattr_value_root *xv, 1103 void *buffer, 1104 size_t len) 1105 { 1106 u32 cpos, p_cluster, num_clusters, bpc, clusters; 1107 u64 blkno; 1108 int i, ret = 0; 1109 size_t cplen, blocksize; 1110 struct buffer_head *bh = NULL; 1111 struct ocfs2_extent_list *el; 1112 1113 el = &xv->xr_list; 1114 clusters = le32_to_cpu(xv->xr_clusters); 1115 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 1116 blocksize = inode->i_sb->s_blocksize; 1117 1118 cpos = 0; 1119 while (cpos < clusters) { 1120 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, 1121 &num_clusters, el, NULL); 1122 if (ret) { 1123 mlog_errno(ret); 1124 goto out; 1125 } 1126 1127 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 1128 /* Copy ocfs2_xattr_value */ 1129 for (i = 0; i < num_clusters * bpc; i++, blkno++) { 1130 ret = ocfs2_read_block(INODE_CACHE(inode), blkno, 1131 &bh, NULL); 1132 if (ret) { 1133 mlog_errno(ret); 1134 goto out; 1135 } 1136 1137 cplen = len >= blocksize ? blocksize : len; 1138 memcpy(buffer, bh->b_data, cplen); 1139 len -= cplen; 1140 buffer += cplen; 1141 1142 brelse(bh); 1143 bh = NULL; 1144 if (len == 0) 1145 break; 1146 } 1147 cpos += num_clusters; 1148 } 1149 out: 1150 return ret; 1151 } 1152 1153 static int ocfs2_xattr_ibody_get(struct inode *inode, 1154 int name_index, 1155 const char *name, 1156 void *buffer, 1157 size_t buffer_size, 1158 struct ocfs2_xattr_search *xs) 1159 { 1160 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1161 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 1162 struct ocfs2_xattr_value_root *xv; 1163 size_t size; 1164 int ret = 0; 1165 1166 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) 1167 return -ENODATA; 1168 1169 xs->end = (void *)di + inode->i_sb->s_blocksize; 1170 xs->header = (struct ocfs2_xattr_header *) 1171 (xs->end - le16_to_cpu(di->i_xattr_inline_size)); 1172 xs->base = (void *)xs->header; 1173 xs->here = xs->header->xh_entries; 1174 1175 ret = ocfs2_xattr_find_entry(name_index, name, xs); 1176 if (ret) 1177 return ret; 1178 size = le64_to_cpu(xs->here->xe_value_size); 1179 if (buffer) { 1180 if (size > buffer_size) 1181 return -ERANGE; 1182 if (ocfs2_xattr_is_local(xs->here)) { 1183 memcpy(buffer, (void *)xs->base + 1184 le16_to_cpu(xs->here->xe_name_offset) + 1185 OCFS2_XATTR_SIZE(xs->here->xe_name_len), size); 1186 } else { 1187 xv = (struct ocfs2_xattr_value_root *) 1188 (xs->base + le16_to_cpu( 1189 xs->here->xe_name_offset) + 1190 OCFS2_XATTR_SIZE(xs->here->xe_name_len)); 1191 ret = ocfs2_xattr_get_value_outside(inode, xv, 1192 buffer, size); 1193 if (ret < 0) { 1194 mlog_errno(ret); 1195 return ret; 1196 } 1197 } 1198 } 1199 1200 return size; 1201 } 1202 1203 static int ocfs2_xattr_block_get(struct inode *inode, 1204 int name_index, 1205 const char *name, 1206 void *buffer, 1207 size_t buffer_size, 1208 struct ocfs2_xattr_search *xs) 1209 { 1210 struct ocfs2_xattr_block *xb; 1211 struct ocfs2_xattr_value_root *xv; 1212 size_t size; 1213 int ret = -ENODATA, name_offset, name_len, i; 1214 int uninitialized_var(block_off); 1215 1216 xs->bucket = ocfs2_xattr_bucket_new(inode); 1217 if (!xs->bucket) { 1218 ret = -ENOMEM; 1219 mlog_errno(ret); 1220 goto cleanup; 1221 } 1222 1223 ret = ocfs2_xattr_block_find(inode, name_index, name, xs); 1224 if (ret) { 1225 mlog_errno(ret); 1226 goto cleanup; 1227 } 1228 1229 if (xs->not_found) { 1230 ret = -ENODATA; 1231 goto cleanup; 1232 } 1233 1234 xb = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; 1235 size = le64_to_cpu(xs->here->xe_value_size); 1236 if (buffer) { 1237 ret = -ERANGE; 1238 if (size > buffer_size) 1239 goto cleanup; 1240 1241 name_offset = le16_to_cpu(xs->here->xe_name_offset); 1242 name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len); 1243 i = xs->here - xs->header->xh_entries; 1244 1245 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 1246 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 1247 bucket_xh(xs->bucket), 1248 i, 1249 &block_off, 1250 &name_offset); 1251 if (ret) { 1252 mlog_errno(ret); 1253 goto cleanup; 1254 } 1255 xs->base = bucket_block(xs->bucket, block_off); 1256 } 1257 if (ocfs2_xattr_is_local(xs->here)) { 1258 memcpy(buffer, (void *)xs->base + 1259 name_offset + name_len, size); 1260 } else { 1261 xv = (struct ocfs2_xattr_value_root *) 1262 (xs->base + name_offset + name_len); 1263 ret = ocfs2_xattr_get_value_outside(inode, xv, 1264 buffer, size); 1265 if (ret < 0) { 1266 mlog_errno(ret); 1267 goto cleanup; 1268 } 1269 } 1270 } 1271 ret = size; 1272 cleanup: 1273 ocfs2_xattr_bucket_free(xs->bucket); 1274 1275 brelse(xs->xattr_bh); 1276 xs->xattr_bh = NULL; 1277 return ret; 1278 } 1279 1280 int ocfs2_xattr_get_nolock(struct inode *inode, 1281 struct buffer_head *di_bh, 1282 int name_index, 1283 const char *name, 1284 void *buffer, 1285 size_t buffer_size) 1286 { 1287 int ret; 1288 struct ocfs2_dinode *di = NULL; 1289 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1290 struct ocfs2_xattr_search xis = { 1291 .not_found = -ENODATA, 1292 }; 1293 struct ocfs2_xattr_search xbs = { 1294 .not_found = -ENODATA, 1295 }; 1296 1297 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) 1298 return -EOPNOTSUPP; 1299 1300 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) 1301 return -ENODATA; 1302 1303 xis.inode_bh = xbs.inode_bh = di_bh; 1304 di = (struct ocfs2_dinode *)di_bh->b_data; 1305 1306 ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer, 1307 buffer_size, &xis); 1308 if (ret == -ENODATA && di->i_xattr_loc) 1309 ret = ocfs2_xattr_block_get(inode, name_index, name, buffer, 1310 buffer_size, &xbs); 1311 1312 return ret; 1313 } 1314 1315 /* ocfs2_xattr_get() 1316 * 1317 * Copy an extended attribute into the buffer provided. 1318 * Buffer is NULL to compute the size of buffer required. 1319 */ 1320 static int ocfs2_xattr_get(struct inode *inode, 1321 int name_index, 1322 const char *name, 1323 void *buffer, 1324 size_t buffer_size) 1325 { 1326 int ret, had_lock; 1327 struct buffer_head *di_bh = NULL; 1328 struct ocfs2_lock_holder oh; 1329 1330 had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 0, &oh); 1331 if (had_lock < 0) { 1332 mlog_errno(had_lock); 1333 return had_lock; 1334 } 1335 down_read(&OCFS2_I(inode)->ip_xattr_sem); 1336 ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index, 1337 name, buffer, buffer_size); 1338 up_read(&OCFS2_I(inode)->ip_xattr_sem); 1339 1340 ocfs2_inode_unlock_tracker(inode, 0, &oh, had_lock); 1341 1342 brelse(di_bh); 1343 1344 return ret; 1345 } 1346 1347 static int __ocfs2_xattr_set_value_outside(struct inode *inode, 1348 handle_t *handle, 1349 struct ocfs2_xattr_value_buf *vb, 1350 const void *value, 1351 int value_len) 1352 { 1353 int ret = 0, i, cp_len; 1354 u16 blocksize = inode->i_sb->s_blocksize; 1355 u32 p_cluster, num_clusters; 1356 u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 1357 u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len); 1358 u64 blkno; 1359 struct buffer_head *bh = NULL; 1360 unsigned int ext_flags; 1361 struct ocfs2_xattr_value_root *xv = vb->vb_xv; 1362 1363 BUG_ON(clusters > le32_to_cpu(xv->xr_clusters)); 1364 1365 while (cpos < clusters) { 1366 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, 1367 &num_clusters, &xv->xr_list, 1368 &ext_flags); 1369 if (ret) { 1370 mlog_errno(ret); 1371 goto out; 1372 } 1373 1374 BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED); 1375 1376 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 1377 1378 for (i = 0; i < num_clusters * bpc; i++, blkno++) { 1379 ret = ocfs2_read_block(INODE_CACHE(inode), blkno, 1380 &bh, NULL); 1381 if (ret) { 1382 mlog_errno(ret); 1383 goto out; 1384 } 1385 1386 ret = ocfs2_journal_access(handle, 1387 INODE_CACHE(inode), 1388 bh, 1389 OCFS2_JOURNAL_ACCESS_WRITE); 1390 if (ret < 0) { 1391 mlog_errno(ret); 1392 goto out; 1393 } 1394 1395 cp_len = value_len > blocksize ? blocksize : value_len; 1396 memcpy(bh->b_data, value, cp_len); 1397 value_len -= cp_len; 1398 value += cp_len; 1399 if (cp_len < blocksize) 1400 memset(bh->b_data + cp_len, 0, 1401 blocksize - cp_len); 1402 1403 ocfs2_journal_dirty(handle, bh); 1404 brelse(bh); 1405 bh = NULL; 1406 1407 /* 1408 * XXX: do we need to empty all the following 1409 * blocks in this cluster? 1410 */ 1411 if (!value_len) 1412 break; 1413 } 1414 cpos += num_clusters; 1415 } 1416 out: 1417 brelse(bh); 1418 1419 return ret; 1420 } 1421 1422 static int ocfs2_xa_check_space_helper(int needed_space, int free_start, 1423 int num_entries) 1424 { 1425 int free_space; 1426 1427 if (!needed_space) 1428 return 0; 1429 1430 free_space = free_start - 1431 sizeof(struct ocfs2_xattr_header) - 1432 (num_entries * sizeof(struct ocfs2_xattr_entry)) - 1433 OCFS2_XATTR_HEADER_GAP; 1434 if (free_space < 0) 1435 return -EIO; 1436 if (free_space < needed_space) 1437 return -ENOSPC; 1438 1439 return 0; 1440 } 1441 1442 static int ocfs2_xa_journal_access(handle_t *handle, struct ocfs2_xa_loc *loc, 1443 int type) 1444 { 1445 return loc->xl_ops->xlo_journal_access(handle, loc, type); 1446 } 1447 1448 static void ocfs2_xa_journal_dirty(handle_t *handle, struct ocfs2_xa_loc *loc) 1449 { 1450 loc->xl_ops->xlo_journal_dirty(handle, loc); 1451 } 1452 1453 /* Give a pointer into the storage for the given offset */ 1454 static void *ocfs2_xa_offset_pointer(struct ocfs2_xa_loc *loc, int offset) 1455 { 1456 BUG_ON(offset >= loc->xl_size); 1457 return loc->xl_ops->xlo_offset_pointer(loc, offset); 1458 } 1459 1460 /* 1461 * Wipe the name+value pair and allow the storage to reclaim it. This 1462 * must be followed by either removal of the entry or a call to 1463 * ocfs2_xa_add_namevalue(). 1464 */ 1465 static void ocfs2_xa_wipe_namevalue(struct ocfs2_xa_loc *loc) 1466 { 1467 loc->xl_ops->xlo_wipe_namevalue(loc); 1468 } 1469 1470 /* 1471 * Find lowest offset to a name+value pair. This is the start of our 1472 * downward-growing free space. 1473 */ 1474 static int ocfs2_xa_get_free_start(struct ocfs2_xa_loc *loc) 1475 { 1476 return loc->xl_ops->xlo_get_free_start(loc); 1477 } 1478 1479 /* Can we reuse loc->xl_entry for xi? */ 1480 static int ocfs2_xa_can_reuse_entry(struct ocfs2_xa_loc *loc, 1481 struct ocfs2_xattr_info *xi) 1482 { 1483 return loc->xl_ops->xlo_can_reuse(loc, xi); 1484 } 1485 1486 /* How much free space is needed to set the new value */ 1487 static int ocfs2_xa_check_space(struct ocfs2_xa_loc *loc, 1488 struct ocfs2_xattr_info *xi) 1489 { 1490 return loc->xl_ops->xlo_check_space(loc, xi); 1491 } 1492 1493 static void ocfs2_xa_add_namevalue(struct ocfs2_xa_loc *loc, 1494 struct ocfs2_xattr_info *xi) 1495 { 1496 int size = namevalue_size_xi(xi); 1497 int nameval_offset; 1498 char *nameval_buf; 1499 1500 loc->xl_ops->xlo_add_namevalue(loc, size); 1501 loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len); 1502 loc->xl_entry->xe_name_len = xi->xi_name_len; 1503 ocfs2_xattr_set_type(loc->xl_entry, xi->xi_name_index); 1504 ocfs2_xattr_set_local(loc->xl_entry, 1505 xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE); 1506 1507 nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 1508 nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset); 1509 memset(nameval_buf, 0, size); 1510 memcpy(nameval_buf, xi->xi_name, xi->xi_name_len); 1511 } 1512 1513 static void ocfs2_xa_fill_value_buf(struct ocfs2_xa_loc *loc, 1514 struct ocfs2_xattr_value_buf *vb) 1515 { 1516 int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 1517 int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len); 1518 1519 /* Value bufs are for value trees */ 1520 BUG_ON(ocfs2_xattr_is_local(loc->xl_entry)); 1521 BUG_ON(namevalue_size_xe(loc->xl_entry) != 1522 (name_size + OCFS2_XATTR_ROOT_SIZE)); 1523 1524 loc->xl_ops->xlo_fill_value_buf(loc, vb); 1525 vb->vb_xv = 1526 (struct ocfs2_xattr_value_root *)ocfs2_xa_offset_pointer(loc, 1527 nameval_offset + 1528 name_size); 1529 } 1530 1531 static int ocfs2_xa_block_journal_access(handle_t *handle, 1532 struct ocfs2_xa_loc *loc, int type) 1533 { 1534 struct buffer_head *bh = loc->xl_storage; 1535 ocfs2_journal_access_func access; 1536 1537 if (loc->xl_size == (bh->b_size - 1538 offsetof(struct ocfs2_xattr_block, 1539 xb_attrs.xb_header))) 1540 access = ocfs2_journal_access_xb; 1541 else 1542 access = ocfs2_journal_access_di; 1543 return access(handle, INODE_CACHE(loc->xl_inode), bh, type); 1544 } 1545 1546 static void ocfs2_xa_block_journal_dirty(handle_t *handle, 1547 struct ocfs2_xa_loc *loc) 1548 { 1549 struct buffer_head *bh = loc->xl_storage; 1550 1551 ocfs2_journal_dirty(handle, bh); 1552 } 1553 1554 static void *ocfs2_xa_block_offset_pointer(struct ocfs2_xa_loc *loc, 1555 int offset) 1556 { 1557 return (char *)loc->xl_header + offset; 1558 } 1559 1560 static int ocfs2_xa_block_can_reuse(struct ocfs2_xa_loc *loc, 1561 struct ocfs2_xattr_info *xi) 1562 { 1563 /* 1564 * Block storage is strict. If the sizes aren't exact, we will 1565 * remove the old one and reinsert the new. 1566 */ 1567 return namevalue_size_xe(loc->xl_entry) == 1568 namevalue_size_xi(xi); 1569 } 1570 1571 static int ocfs2_xa_block_get_free_start(struct ocfs2_xa_loc *loc) 1572 { 1573 struct ocfs2_xattr_header *xh = loc->xl_header; 1574 int i, count = le16_to_cpu(xh->xh_count); 1575 int offset, free_start = loc->xl_size; 1576 1577 for (i = 0; i < count; i++) { 1578 offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset); 1579 if (offset < free_start) 1580 free_start = offset; 1581 } 1582 1583 return free_start; 1584 } 1585 1586 static int ocfs2_xa_block_check_space(struct ocfs2_xa_loc *loc, 1587 struct ocfs2_xattr_info *xi) 1588 { 1589 int count = le16_to_cpu(loc->xl_header->xh_count); 1590 int free_start = ocfs2_xa_get_free_start(loc); 1591 int needed_space = ocfs2_xi_entry_usage(xi); 1592 1593 /* 1594 * Block storage will reclaim the original entry before inserting 1595 * the new value, so we only need the difference. If the new 1596 * entry is smaller than the old one, we don't need anything. 1597 */ 1598 if (loc->xl_entry) { 1599 /* Don't need space if we're reusing! */ 1600 if (ocfs2_xa_can_reuse_entry(loc, xi)) 1601 needed_space = 0; 1602 else 1603 needed_space -= ocfs2_xe_entry_usage(loc->xl_entry); 1604 } 1605 if (needed_space < 0) 1606 needed_space = 0; 1607 return ocfs2_xa_check_space_helper(needed_space, free_start, count); 1608 } 1609 1610 /* 1611 * Block storage for xattrs keeps the name+value pairs compacted. When 1612 * we remove one, we have to shift any that preceded it towards the end. 1613 */ 1614 static void ocfs2_xa_block_wipe_namevalue(struct ocfs2_xa_loc *loc) 1615 { 1616 int i, offset; 1617 int namevalue_offset, first_namevalue_offset, namevalue_size; 1618 struct ocfs2_xattr_entry *entry = loc->xl_entry; 1619 struct ocfs2_xattr_header *xh = loc->xl_header; 1620 int count = le16_to_cpu(xh->xh_count); 1621 1622 namevalue_offset = le16_to_cpu(entry->xe_name_offset); 1623 namevalue_size = namevalue_size_xe(entry); 1624 first_namevalue_offset = ocfs2_xa_get_free_start(loc); 1625 1626 /* Shift the name+value pairs */ 1627 memmove((char *)xh + first_namevalue_offset + namevalue_size, 1628 (char *)xh + first_namevalue_offset, 1629 namevalue_offset - first_namevalue_offset); 1630 memset((char *)xh + first_namevalue_offset, 0, namevalue_size); 1631 1632 /* Now tell xh->xh_entries about it */ 1633 for (i = 0; i < count; i++) { 1634 offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset); 1635 if (offset <= namevalue_offset) 1636 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, 1637 namevalue_size); 1638 } 1639 1640 /* 1641 * Note that we don't update xh_free_start or xh_name_value_len 1642 * because they're not used in block-stored xattrs. 1643 */ 1644 } 1645 1646 static void ocfs2_xa_block_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) 1647 { 1648 int count = le16_to_cpu(loc->xl_header->xh_count); 1649 loc->xl_entry = &(loc->xl_header->xh_entries[count]); 1650 le16_add_cpu(&loc->xl_header->xh_count, 1); 1651 memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry)); 1652 } 1653 1654 static void ocfs2_xa_block_add_namevalue(struct ocfs2_xa_loc *loc, int size) 1655 { 1656 int free_start = ocfs2_xa_get_free_start(loc); 1657 1658 loc->xl_entry->xe_name_offset = cpu_to_le16(free_start - size); 1659 } 1660 1661 static void ocfs2_xa_block_fill_value_buf(struct ocfs2_xa_loc *loc, 1662 struct ocfs2_xattr_value_buf *vb) 1663 { 1664 struct buffer_head *bh = loc->xl_storage; 1665 1666 if (loc->xl_size == (bh->b_size - 1667 offsetof(struct ocfs2_xattr_block, 1668 xb_attrs.xb_header))) 1669 vb->vb_access = ocfs2_journal_access_xb; 1670 else 1671 vb->vb_access = ocfs2_journal_access_di; 1672 vb->vb_bh = bh; 1673 } 1674 1675 /* 1676 * Operations for xattrs stored in blocks. This includes inline inode 1677 * storage and unindexed ocfs2_xattr_blocks. 1678 */ 1679 static const struct ocfs2_xa_loc_operations ocfs2_xa_block_loc_ops = { 1680 .xlo_journal_access = ocfs2_xa_block_journal_access, 1681 .xlo_journal_dirty = ocfs2_xa_block_journal_dirty, 1682 .xlo_offset_pointer = ocfs2_xa_block_offset_pointer, 1683 .xlo_check_space = ocfs2_xa_block_check_space, 1684 .xlo_can_reuse = ocfs2_xa_block_can_reuse, 1685 .xlo_get_free_start = ocfs2_xa_block_get_free_start, 1686 .xlo_wipe_namevalue = ocfs2_xa_block_wipe_namevalue, 1687 .xlo_add_entry = ocfs2_xa_block_add_entry, 1688 .xlo_add_namevalue = ocfs2_xa_block_add_namevalue, 1689 .xlo_fill_value_buf = ocfs2_xa_block_fill_value_buf, 1690 }; 1691 1692 static int ocfs2_xa_bucket_journal_access(handle_t *handle, 1693 struct ocfs2_xa_loc *loc, int type) 1694 { 1695 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1696 1697 return ocfs2_xattr_bucket_journal_access(handle, bucket, type); 1698 } 1699 1700 static void ocfs2_xa_bucket_journal_dirty(handle_t *handle, 1701 struct ocfs2_xa_loc *loc) 1702 { 1703 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1704 1705 ocfs2_xattr_bucket_journal_dirty(handle, bucket); 1706 } 1707 1708 static void *ocfs2_xa_bucket_offset_pointer(struct ocfs2_xa_loc *loc, 1709 int offset) 1710 { 1711 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1712 int block, block_offset; 1713 1714 /* The header is at the front of the bucket */ 1715 block = offset >> loc->xl_inode->i_sb->s_blocksize_bits; 1716 block_offset = offset % loc->xl_inode->i_sb->s_blocksize; 1717 1718 return bucket_block(bucket, block) + block_offset; 1719 } 1720 1721 static int ocfs2_xa_bucket_can_reuse(struct ocfs2_xa_loc *loc, 1722 struct ocfs2_xattr_info *xi) 1723 { 1724 return namevalue_size_xe(loc->xl_entry) >= 1725 namevalue_size_xi(xi); 1726 } 1727 1728 static int ocfs2_xa_bucket_get_free_start(struct ocfs2_xa_loc *loc) 1729 { 1730 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1731 return le16_to_cpu(bucket_xh(bucket)->xh_free_start); 1732 } 1733 1734 static int ocfs2_bucket_align_free_start(struct super_block *sb, 1735 int free_start, int size) 1736 { 1737 /* 1738 * We need to make sure that the name+value pair fits within 1739 * one block. 1740 */ 1741 if (((free_start - size) >> sb->s_blocksize_bits) != 1742 ((free_start - 1) >> sb->s_blocksize_bits)) 1743 free_start -= free_start % sb->s_blocksize; 1744 1745 return free_start; 1746 } 1747 1748 static int ocfs2_xa_bucket_check_space(struct ocfs2_xa_loc *loc, 1749 struct ocfs2_xattr_info *xi) 1750 { 1751 int rc; 1752 int count = le16_to_cpu(loc->xl_header->xh_count); 1753 int free_start = ocfs2_xa_get_free_start(loc); 1754 int needed_space = ocfs2_xi_entry_usage(xi); 1755 int size = namevalue_size_xi(xi); 1756 struct super_block *sb = loc->xl_inode->i_sb; 1757 1758 /* 1759 * Bucket storage does not reclaim name+value pairs it cannot 1760 * reuse. They live as holes until the bucket fills, and then 1761 * the bucket is defragmented. However, the bucket can reclaim 1762 * the ocfs2_xattr_entry. 1763 */ 1764 if (loc->xl_entry) { 1765 /* Don't need space if we're reusing! */ 1766 if (ocfs2_xa_can_reuse_entry(loc, xi)) 1767 needed_space = 0; 1768 else 1769 needed_space -= sizeof(struct ocfs2_xattr_entry); 1770 } 1771 BUG_ON(needed_space < 0); 1772 1773 if (free_start < size) { 1774 if (needed_space) 1775 return -ENOSPC; 1776 } else { 1777 /* 1778 * First we check if it would fit in the first place. 1779 * Below, we align the free start to a block. This may 1780 * slide us below the minimum gap. By checking unaligned 1781 * first, we avoid that error. 1782 */ 1783 rc = ocfs2_xa_check_space_helper(needed_space, free_start, 1784 count); 1785 if (rc) 1786 return rc; 1787 free_start = ocfs2_bucket_align_free_start(sb, free_start, 1788 size); 1789 } 1790 return ocfs2_xa_check_space_helper(needed_space, free_start, count); 1791 } 1792 1793 static void ocfs2_xa_bucket_wipe_namevalue(struct ocfs2_xa_loc *loc) 1794 { 1795 le16_add_cpu(&loc->xl_header->xh_name_value_len, 1796 -namevalue_size_xe(loc->xl_entry)); 1797 } 1798 1799 static void ocfs2_xa_bucket_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) 1800 { 1801 struct ocfs2_xattr_header *xh = loc->xl_header; 1802 int count = le16_to_cpu(xh->xh_count); 1803 int low = 0, high = count - 1, tmp; 1804 struct ocfs2_xattr_entry *tmp_xe; 1805 1806 /* 1807 * We keep buckets sorted by name_hash, so we need to find 1808 * our insert place. 1809 */ 1810 while (low <= high && count) { 1811 tmp = (low + high) / 2; 1812 tmp_xe = &xh->xh_entries[tmp]; 1813 1814 if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash)) 1815 low = tmp + 1; 1816 else if (name_hash < le32_to_cpu(tmp_xe->xe_name_hash)) 1817 high = tmp - 1; 1818 else { 1819 low = tmp; 1820 break; 1821 } 1822 } 1823 1824 if (low != count) 1825 memmove(&xh->xh_entries[low + 1], 1826 &xh->xh_entries[low], 1827 ((count - low) * sizeof(struct ocfs2_xattr_entry))); 1828 1829 le16_add_cpu(&xh->xh_count, 1); 1830 loc->xl_entry = &xh->xh_entries[low]; 1831 memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry)); 1832 } 1833 1834 static void ocfs2_xa_bucket_add_namevalue(struct ocfs2_xa_loc *loc, int size) 1835 { 1836 int free_start = ocfs2_xa_get_free_start(loc); 1837 struct ocfs2_xattr_header *xh = loc->xl_header; 1838 struct super_block *sb = loc->xl_inode->i_sb; 1839 int nameval_offset; 1840 1841 free_start = ocfs2_bucket_align_free_start(sb, free_start, size); 1842 nameval_offset = free_start - size; 1843 loc->xl_entry->xe_name_offset = cpu_to_le16(nameval_offset); 1844 xh->xh_free_start = cpu_to_le16(nameval_offset); 1845 le16_add_cpu(&xh->xh_name_value_len, size); 1846 1847 } 1848 1849 static void ocfs2_xa_bucket_fill_value_buf(struct ocfs2_xa_loc *loc, 1850 struct ocfs2_xattr_value_buf *vb) 1851 { 1852 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1853 struct super_block *sb = loc->xl_inode->i_sb; 1854 int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 1855 int size = namevalue_size_xe(loc->xl_entry); 1856 int block_offset = nameval_offset >> sb->s_blocksize_bits; 1857 1858 /* Values are not allowed to straddle block boundaries */ 1859 BUG_ON(block_offset != 1860 ((nameval_offset + size - 1) >> sb->s_blocksize_bits)); 1861 /* We expect the bucket to be filled in */ 1862 BUG_ON(!bucket->bu_bhs[block_offset]); 1863 1864 vb->vb_access = ocfs2_journal_access; 1865 vb->vb_bh = bucket->bu_bhs[block_offset]; 1866 } 1867 1868 /* Operations for xattrs stored in buckets. */ 1869 static const struct ocfs2_xa_loc_operations ocfs2_xa_bucket_loc_ops = { 1870 .xlo_journal_access = ocfs2_xa_bucket_journal_access, 1871 .xlo_journal_dirty = ocfs2_xa_bucket_journal_dirty, 1872 .xlo_offset_pointer = ocfs2_xa_bucket_offset_pointer, 1873 .xlo_check_space = ocfs2_xa_bucket_check_space, 1874 .xlo_can_reuse = ocfs2_xa_bucket_can_reuse, 1875 .xlo_get_free_start = ocfs2_xa_bucket_get_free_start, 1876 .xlo_wipe_namevalue = ocfs2_xa_bucket_wipe_namevalue, 1877 .xlo_add_entry = ocfs2_xa_bucket_add_entry, 1878 .xlo_add_namevalue = ocfs2_xa_bucket_add_namevalue, 1879 .xlo_fill_value_buf = ocfs2_xa_bucket_fill_value_buf, 1880 }; 1881 1882 static unsigned int ocfs2_xa_value_clusters(struct ocfs2_xa_loc *loc) 1883 { 1884 struct ocfs2_xattr_value_buf vb; 1885 1886 if (ocfs2_xattr_is_local(loc->xl_entry)) 1887 return 0; 1888 1889 ocfs2_xa_fill_value_buf(loc, &vb); 1890 return le32_to_cpu(vb.vb_xv->xr_clusters); 1891 } 1892 1893 static int ocfs2_xa_value_truncate(struct ocfs2_xa_loc *loc, u64 bytes, 1894 struct ocfs2_xattr_set_ctxt *ctxt) 1895 { 1896 int trunc_rc, access_rc; 1897 struct ocfs2_xattr_value_buf vb; 1898 1899 ocfs2_xa_fill_value_buf(loc, &vb); 1900 trunc_rc = ocfs2_xattr_value_truncate(loc->xl_inode, &vb, bytes, 1901 ctxt); 1902 1903 /* 1904 * The caller of ocfs2_xa_value_truncate() has already called 1905 * ocfs2_xa_journal_access on the loc. However, The truncate code 1906 * calls ocfs2_extend_trans(). This may commit the previous 1907 * transaction and open a new one. If this is a bucket, truncate 1908 * could leave only vb->vb_bh set up for journaling. Meanwhile, 1909 * the caller is expecting to dirty the entire bucket. So we must 1910 * reset the journal work. We do this even if truncate has failed, 1911 * as it could have failed after committing the extend. 1912 */ 1913 access_rc = ocfs2_xa_journal_access(ctxt->handle, loc, 1914 OCFS2_JOURNAL_ACCESS_WRITE); 1915 1916 /* Errors in truncate take precedence */ 1917 return trunc_rc ? trunc_rc : access_rc; 1918 } 1919 1920 static void ocfs2_xa_remove_entry(struct ocfs2_xa_loc *loc) 1921 { 1922 int index, count; 1923 struct ocfs2_xattr_header *xh = loc->xl_header; 1924 struct ocfs2_xattr_entry *entry = loc->xl_entry; 1925 1926 ocfs2_xa_wipe_namevalue(loc); 1927 loc->xl_entry = NULL; 1928 1929 le16_add_cpu(&xh->xh_count, -1); 1930 count = le16_to_cpu(xh->xh_count); 1931 1932 /* 1933 * Only zero out the entry if there are more remaining. This is 1934 * important for an empty bucket, as it keeps track of the 1935 * bucket's hash value. It doesn't hurt empty block storage. 1936 */ 1937 if (count) { 1938 index = ((char *)entry - (char *)&xh->xh_entries) / 1939 sizeof(struct ocfs2_xattr_entry); 1940 memmove(&xh->xh_entries[index], &xh->xh_entries[index + 1], 1941 (count - index) * sizeof(struct ocfs2_xattr_entry)); 1942 memset(&xh->xh_entries[count], 0, 1943 sizeof(struct ocfs2_xattr_entry)); 1944 } 1945 } 1946 1947 /* 1948 * If we have a problem adjusting the size of an external value during 1949 * ocfs2_xa_prepare_entry() or ocfs2_xa_remove(), we may have an xattr 1950 * in an intermediate state. For example, the value may be partially 1951 * truncated. 1952 * 1953 * If the value tree hasn't changed, the extend/truncate went nowhere. 1954 * We have nothing to do. The caller can treat it as a straight error. 1955 * 1956 * If the value tree got partially truncated, we now have a corrupted 1957 * extended attribute. We're going to wipe its entry and leak the 1958 * clusters. Better to leak some storage than leave a corrupt entry. 1959 * 1960 * If the value tree grew, it obviously didn't grow enough for the 1961 * new entry. We're not going to try and reclaim those clusters either. 1962 * If there was already an external value there (orig_clusters != 0), 1963 * the new clusters are attached safely and we can just leave the old 1964 * value in place. If there was no external value there, we remove 1965 * the entry. 1966 * 1967 * This way, the xattr block we store in the journal will be consistent. 1968 * If the size change broke because of the journal, no changes will hit 1969 * disk anyway. 1970 */ 1971 static void ocfs2_xa_cleanup_value_truncate(struct ocfs2_xa_loc *loc, 1972 const char *what, 1973 unsigned int orig_clusters) 1974 { 1975 unsigned int new_clusters = ocfs2_xa_value_clusters(loc); 1976 char *nameval_buf = ocfs2_xa_offset_pointer(loc, 1977 le16_to_cpu(loc->xl_entry->xe_name_offset)); 1978 1979 if (new_clusters < orig_clusters) { 1980 mlog(ML_ERROR, 1981 "Partial truncate while %s xattr %.*s. Leaking " 1982 "%u clusters and removing the entry\n", 1983 what, loc->xl_entry->xe_name_len, nameval_buf, 1984 orig_clusters - new_clusters); 1985 ocfs2_xa_remove_entry(loc); 1986 } else if (!orig_clusters) { 1987 mlog(ML_ERROR, 1988 "Unable to allocate an external value for xattr " 1989 "%.*s safely. Leaking %u clusters and removing the " 1990 "entry\n", 1991 loc->xl_entry->xe_name_len, nameval_buf, 1992 new_clusters - orig_clusters); 1993 ocfs2_xa_remove_entry(loc); 1994 } else if (new_clusters > orig_clusters) 1995 mlog(ML_ERROR, 1996 "Unable to grow xattr %.*s safely. %u new clusters " 1997 "have been added, but the value will not be " 1998 "modified\n", 1999 loc->xl_entry->xe_name_len, nameval_buf, 2000 new_clusters - orig_clusters); 2001 } 2002 2003 static int ocfs2_xa_remove(struct ocfs2_xa_loc *loc, 2004 struct ocfs2_xattr_set_ctxt *ctxt) 2005 { 2006 int rc = 0; 2007 unsigned int orig_clusters; 2008 2009 if (!ocfs2_xattr_is_local(loc->xl_entry)) { 2010 orig_clusters = ocfs2_xa_value_clusters(loc); 2011 rc = ocfs2_xa_value_truncate(loc, 0, ctxt); 2012 if (rc) { 2013 mlog_errno(rc); 2014 /* 2015 * Since this is remove, we can return 0 if 2016 * ocfs2_xa_cleanup_value_truncate() is going to 2017 * wipe the entry anyway. So we check the 2018 * cluster count as well. 2019 */ 2020 if (orig_clusters != ocfs2_xa_value_clusters(loc)) 2021 rc = 0; 2022 ocfs2_xa_cleanup_value_truncate(loc, "removing", 2023 orig_clusters); 2024 if (rc) 2025 goto out; 2026 } 2027 } 2028 2029 ocfs2_xa_remove_entry(loc); 2030 2031 out: 2032 return rc; 2033 } 2034 2035 static void ocfs2_xa_install_value_root(struct ocfs2_xa_loc *loc) 2036 { 2037 int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len); 2038 char *nameval_buf; 2039 2040 nameval_buf = ocfs2_xa_offset_pointer(loc, 2041 le16_to_cpu(loc->xl_entry->xe_name_offset)); 2042 memcpy(nameval_buf + name_size, &def_xv, OCFS2_XATTR_ROOT_SIZE); 2043 } 2044 2045 /* 2046 * Take an existing entry and make it ready for the new value. This 2047 * won't allocate space, but it may free space. It should be ready for 2048 * ocfs2_xa_prepare_entry() to finish the work. 2049 */ 2050 static int ocfs2_xa_reuse_entry(struct ocfs2_xa_loc *loc, 2051 struct ocfs2_xattr_info *xi, 2052 struct ocfs2_xattr_set_ctxt *ctxt) 2053 { 2054 int rc = 0; 2055 int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len); 2056 unsigned int orig_clusters; 2057 char *nameval_buf; 2058 int xe_local = ocfs2_xattr_is_local(loc->xl_entry); 2059 int xi_local = xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE; 2060 2061 BUG_ON(OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len) != 2062 name_size); 2063 2064 nameval_buf = ocfs2_xa_offset_pointer(loc, 2065 le16_to_cpu(loc->xl_entry->xe_name_offset)); 2066 if (xe_local) { 2067 memset(nameval_buf + name_size, 0, 2068 namevalue_size_xe(loc->xl_entry) - name_size); 2069 if (!xi_local) 2070 ocfs2_xa_install_value_root(loc); 2071 } else { 2072 orig_clusters = ocfs2_xa_value_clusters(loc); 2073 if (xi_local) { 2074 rc = ocfs2_xa_value_truncate(loc, 0, ctxt); 2075 if (rc < 0) 2076 mlog_errno(rc); 2077 else 2078 memset(nameval_buf + name_size, 0, 2079 namevalue_size_xe(loc->xl_entry) - 2080 name_size); 2081 } else if (le64_to_cpu(loc->xl_entry->xe_value_size) > 2082 xi->xi_value_len) { 2083 rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, 2084 ctxt); 2085 if (rc < 0) 2086 mlog_errno(rc); 2087 } 2088 2089 if (rc) { 2090 ocfs2_xa_cleanup_value_truncate(loc, "reusing", 2091 orig_clusters); 2092 goto out; 2093 } 2094 } 2095 2096 loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len); 2097 ocfs2_xattr_set_local(loc->xl_entry, xi_local); 2098 2099 out: 2100 return rc; 2101 } 2102 2103 /* 2104 * Prepares loc->xl_entry to receive the new xattr. This includes 2105 * properly setting up the name+value pair region. If loc->xl_entry 2106 * already exists, it will take care of modifying it appropriately. 2107 * 2108 * Note that this modifies the data. You did journal_access already, 2109 * right? 2110 */ 2111 static int ocfs2_xa_prepare_entry(struct ocfs2_xa_loc *loc, 2112 struct ocfs2_xattr_info *xi, 2113 u32 name_hash, 2114 struct ocfs2_xattr_set_ctxt *ctxt) 2115 { 2116 int rc = 0; 2117 unsigned int orig_clusters; 2118 __le64 orig_value_size = 0; 2119 2120 rc = ocfs2_xa_check_space(loc, xi); 2121 if (rc) 2122 goto out; 2123 2124 if (!loc->xl_entry) { 2125 rc = -EINVAL; 2126 goto out; 2127 } 2128 2129 if (ocfs2_xa_can_reuse_entry(loc, xi)) { 2130 orig_value_size = loc->xl_entry->xe_value_size; 2131 rc = ocfs2_xa_reuse_entry(loc, xi, ctxt); 2132 if (rc) 2133 goto out; 2134 goto alloc_value; 2135 } 2136 2137 if (!ocfs2_xattr_is_local(loc->xl_entry)) { 2138 orig_clusters = ocfs2_xa_value_clusters(loc); 2139 rc = ocfs2_xa_value_truncate(loc, 0, ctxt); 2140 if (rc) { 2141 mlog_errno(rc); 2142 ocfs2_xa_cleanup_value_truncate(loc, 2143 "overwriting", 2144 orig_clusters); 2145 goto out; 2146 } 2147 } 2148 ocfs2_xa_wipe_namevalue(loc); 2149 2150 /* 2151 * If we get here, we have a blank entry. Fill it. We grow our 2152 * name+value pair back from the end. 2153 */ 2154 ocfs2_xa_add_namevalue(loc, xi); 2155 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) 2156 ocfs2_xa_install_value_root(loc); 2157 2158 alloc_value: 2159 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 2160 orig_clusters = ocfs2_xa_value_clusters(loc); 2161 rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, ctxt); 2162 if (rc < 0) { 2163 ctxt->set_abort = 1; 2164 ocfs2_xa_cleanup_value_truncate(loc, "growing", 2165 orig_clusters); 2166 /* 2167 * If we were growing an existing value, 2168 * ocfs2_xa_cleanup_value_truncate() won't remove 2169 * the entry. We need to restore the original value 2170 * size. 2171 */ 2172 if (loc->xl_entry) { 2173 BUG_ON(!orig_value_size); 2174 loc->xl_entry->xe_value_size = orig_value_size; 2175 } 2176 mlog_errno(rc); 2177 } 2178 } 2179 2180 out: 2181 return rc; 2182 } 2183 2184 /* 2185 * Store the value portion of the name+value pair. This will skip 2186 * values that are stored externally. Their tree roots were set up 2187 * by ocfs2_xa_prepare_entry(). 2188 */ 2189 static int ocfs2_xa_store_value(struct ocfs2_xa_loc *loc, 2190 struct ocfs2_xattr_info *xi, 2191 struct ocfs2_xattr_set_ctxt *ctxt) 2192 { 2193 int rc = 0; 2194 int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 2195 int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len); 2196 char *nameval_buf; 2197 struct ocfs2_xattr_value_buf vb; 2198 2199 nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset); 2200 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 2201 ocfs2_xa_fill_value_buf(loc, &vb); 2202 rc = __ocfs2_xattr_set_value_outside(loc->xl_inode, 2203 ctxt->handle, &vb, 2204 xi->xi_value, 2205 xi->xi_value_len); 2206 } else 2207 memcpy(nameval_buf + name_size, xi->xi_value, xi->xi_value_len); 2208 2209 return rc; 2210 } 2211 2212 static int ocfs2_xa_set(struct ocfs2_xa_loc *loc, 2213 struct ocfs2_xattr_info *xi, 2214 struct ocfs2_xattr_set_ctxt *ctxt) 2215 { 2216 int ret; 2217 u32 name_hash = ocfs2_xattr_name_hash(loc->xl_inode, xi->xi_name, 2218 xi->xi_name_len); 2219 2220 ret = ocfs2_xa_journal_access(ctxt->handle, loc, 2221 OCFS2_JOURNAL_ACCESS_WRITE); 2222 if (ret) { 2223 mlog_errno(ret); 2224 goto out; 2225 } 2226 2227 /* 2228 * From here on out, everything is going to modify the buffer a 2229 * little. Errors are going to leave the xattr header in a 2230 * sane state. Thus, even with errors we dirty the sucker. 2231 */ 2232 2233 /* Don't worry, we are never called with !xi_value and !xl_entry */ 2234 if (!xi->xi_value) { 2235 ret = ocfs2_xa_remove(loc, ctxt); 2236 goto out_dirty; 2237 } 2238 2239 ret = ocfs2_xa_prepare_entry(loc, xi, name_hash, ctxt); 2240 if (ret) { 2241 if (ret != -ENOSPC) 2242 mlog_errno(ret); 2243 goto out_dirty; 2244 } 2245 2246 ret = ocfs2_xa_store_value(loc, xi, ctxt); 2247 if (ret) 2248 mlog_errno(ret); 2249 2250 out_dirty: 2251 ocfs2_xa_journal_dirty(ctxt->handle, loc); 2252 2253 out: 2254 return ret; 2255 } 2256 2257 static void ocfs2_init_dinode_xa_loc(struct ocfs2_xa_loc *loc, 2258 struct inode *inode, 2259 struct buffer_head *bh, 2260 struct ocfs2_xattr_entry *entry) 2261 { 2262 struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; 2263 2264 BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_XATTR_FL)); 2265 2266 loc->xl_inode = inode; 2267 loc->xl_ops = &ocfs2_xa_block_loc_ops; 2268 loc->xl_storage = bh; 2269 loc->xl_entry = entry; 2270 loc->xl_size = le16_to_cpu(di->i_xattr_inline_size); 2271 loc->xl_header = 2272 (struct ocfs2_xattr_header *)(bh->b_data + bh->b_size - 2273 loc->xl_size); 2274 } 2275 2276 static void ocfs2_init_xattr_block_xa_loc(struct ocfs2_xa_loc *loc, 2277 struct inode *inode, 2278 struct buffer_head *bh, 2279 struct ocfs2_xattr_entry *entry) 2280 { 2281 struct ocfs2_xattr_block *xb = 2282 (struct ocfs2_xattr_block *)bh->b_data; 2283 2284 BUG_ON(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED); 2285 2286 loc->xl_inode = inode; 2287 loc->xl_ops = &ocfs2_xa_block_loc_ops; 2288 loc->xl_storage = bh; 2289 loc->xl_header = &(xb->xb_attrs.xb_header); 2290 loc->xl_entry = entry; 2291 loc->xl_size = bh->b_size - offsetof(struct ocfs2_xattr_block, 2292 xb_attrs.xb_header); 2293 } 2294 2295 static void ocfs2_init_xattr_bucket_xa_loc(struct ocfs2_xa_loc *loc, 2296 struct ocfs2_xattr_bucket *bucket, 2297 struct ocfs2_xattr_entry *entry) 2298 { 2299 loc->xl_inode = bucket->bu_inode; 2300 loc->xl_ops = &ocfs2_xa_bucket_loc_ops; 2301 loc->xl_storage = bucket; 2302 loc->xl_header = bucket_xh(bucket); 2303 loc->xl_entry = entry; 2304 loc->xl_size = OCFS2_XATTR_BUCKET_SIZE; 2305 } 2306 2307 /* 2308 * In xattr remove, if it is stored outside and refcounted, we may have 2309 * the chance to split the refcount tree. So need the allocators. 2310 */ 2311 static int ocfs2_lock_xattr_remove_allocators(struct inode *inode, 2312 struct ocfs2_xattr_value_root *xv, 2313 struct ocfs2_caching_info *ref_ci, 2314 struct buffer_head *ref_root_bh, 2315 struct ocfs2_alloc_context **meta_ac, 2316 int *ref_credits) 2317 { 2318 int ret, meta_add = 0; 2319 u32 p_cluster, num_clusters; 2320 unsigned int ext_flags; 2321 2322 *ref_credits = 0; 2323 ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster, 2324 &num_clusters, 2325 &xv->xr_list, 2326 &ext_flags); 2327 if (ret) { 2328 mlog_errno(ret); 2329 goto out; 2330 } 2331 2332 if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) 2333 goto out; 2334 2335 ret = ocfs2_refcounted_xattr_delete_need(inode, ref_ci, 2336 ref_root_bh, xv, 2337 &meta_add, ref_credits); 2338 if (ret) { 2339 mlog_errno(ret); 2340 goto out; 2341 } 2342 2343 ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb), 2344 meta_add, meta_ac); 2345 if (ret) 2346 mlog_errno(ret); 2347 2348 out: 2349 return ret; 2350 } 2351 2352 static int ocfs2_remove_value_outside(struct inode*inode, 2353 struct ocfs2_xattr_value_buf *vb, 2354 struct ocfs2_xattr_header *header, 2355 struct ocfs2_caching_info *ref_ci, 2356 struct buffer_head *ref_root_bh) 2357 { 2358 int ret = 0, i, ref_credits; 2359 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2360 struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, }; 2361 void *val; 2362 2363 ocfs2_init_dealloc_ctxt(&ctxt.dealloc); 2364 2365 for (i = 0; i < le16_to_cpu(header->xh_count); i++) { 2366 struct ocfs2_xattr_entry *entry = &header->xh_entries[i]; 2367 2368 if (ocfs2_xattr_is_local(entry)) 2369 continue; 2370 2371 val = (void *)header + 2372 le16_to_cpu(entry->xe_name_offset); 2373 vb->vb_xv = (struct ocfs2_xattr_value_root *) 2374 (val + OCFS2_XATTR_SIZE(entry->xe_name_len)); 2375 2376 ret = ocfs2_lock_xattr_remove_allocators(inode, vb->vb_xv, 2377 ref_ci, ref_root_bh, 2378 &ctxt.meta_ac, 2379 &ref_credits); 2380 2381 ctxt.handle = ocfs2_start_trans(osb, ref_credits + 2382 ocfs2_remove_extent_credits(osb->sb)); 2383 if (IS_ERR(ctxt.handle)) { 2384 ret = PTR_ERR(ctxt.handle); 2385 mlog_errno(ret); 2386 break; 2387 } 2388 2389 ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt); 2390 2391 ocfs2_commit_trans(osb, ctxt.handle); 2392 if (ctxt.meta_ac) { 2393 ocfs2_free_alloc_context(ctxt.meta_ac); 2394 ctxt.meta_ac = NULL; 2395 } 2396 2397 if (ret < 0) { 2398 mlog_errno(ret); 2399 break; 2400 } 2401 2402 } 2403 2404 if (ctxt.meta_ac) 2405 ocfs2_free_alloc_context(ctxt.meta_ac); 2406 ocfs2_schedule_truncate_log_flush(osb, 1); 2407 ocfs2_run_deallocs(osb, &ctxt.dealloc); 2408 return ret; 2409 } 2410 2411 static int ocfs2_xattr_ibody_remove(struct inode *inode, 2412 struct buffer_head *di_bh, 2413 struct ocfs2_caching_info *ref_ci, 2414 struct buffer_head *ref_root_bh) 2415 { 2416 2417 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 2418 struct ocfs2_xattr_header *header; 2419 int ret; 2420 struct ocfs2_xattr_value_buf vb = { 2421 .vb_bh = di_bh, 2422 .vb_access = ocfs2_journal_access_di, 2423 }; 2424 2425 header = (struct ocfs2_xattr_header *) 2426 ((void *)di + inode->i_sb->s_blocksize - 2427 le16_to_cpu(di->i_xattr_inline_size)); 2428 2429 ret = ocfs2_remove_value_outside(inode, &vb, header, 2430 ref_ci, ref_root_bh); 2431 2432 return ret; 2433 } 2434 2435 struct ocfs2_rm_xattr_bucket_para { 2436 struct ocfs2_caching_info *ref_ci; 2437 struct buffer_head *ref_root_bh; 2438 }; 2439 2440 static int ocfs2_xattr_block_remove(struct inode *inode, 2441 struct buffer_head *blk_bh, 2442 struct ocfs2_caching_info *ref_ci, 2443 struct buffer_head *ref_root_bh) 2444 { 2445 struct ocfs2_xattr_block *xb; 2446 int ret = 0; 2447 struct ocfs2_xattr_value_buf vb = { 2448 .vb_bh = blk_bh, 2449 .vb_access = ocfs2_journal_access_xb, 2450 }; 2451 struct ocfs2_rm_xattr_bucket_para args = { 2452 .ref_ci = ref_ci, 2453 .ref_root_bh = ref_root_bh, 2454 }; 2455 2456 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 2457 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 2458 struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header); 2459 ret = ocfs2_remove_value_outside(inode, &vb, header, 2460 ref_ci, ref_root_bh); 2461 } else 2462 ret = ocfs2_iterate_xattr_index_block(inode, 2463 blk_bh, 2464 ocfs2_rm_xattr_cluster, 2465 &args); 2466 2467 return ret; 2468 } 2469 2470 static int ocfs2_xattr_free_block(struct inode *inode, 2471 u64 block, 2472 struct ocfs2_caching_info *ref_ci, 2473 struct buffer_head *ref_root_bh) 2474 { 2475 struct inode *xb_alloc_inode; 2476 struct buffer_head *xb_alloc_bh = NULL; 2477 struct buffer_head *blk_bh = NULL; 2478 struct ocfs2_xattr_block *xb; 2479 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2480 handle_t *handle; 2481 int ret = 0; 2482 u64 blk, bg_blkno; 2483 u16 bit; 2484 2485 ret = ocfs2_read_xattr_block(inode, block, &blk_bh); 2486 if (ret < 0) { 2487 mlog_errno(ret); 2488 goto out; 2489 } 2490 2491 ret = ocfs2_xattr_block_remove(inode, blk_bh, ref_ci, ref_root_bh); 2492 if (ret < 0) { 2493 mlog_errno(ret); 2494 goto out; 2495 } 2496 2497 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 2498 blk = le64_to_cpu(xb->xb_blkno); 2499 bit = le16_to_cpu(xb->xb_suballoc_bit); 2500 if (xb->xb_suballoc_loc) 2501 bg_blkno = le64_to_cpu(xb->xb_suballoc_loc); 2502 else 2503 bg_blkno = ocfs2_which_suballoc_group(blk, bit); 2504 2505 xb_alloc_inode = ocfs2_get_system_file_inode(osb, 2506 EXTENT_ALLOC_SYSTEM_INODE, 2507 le16_to_cpu(xb->xb_suballoc_slot)); 2508 if (!xb_alloc_inode) { 2509 ret = -ENOMEM; 2510 mlog_errno(ret); 2511 goto out; 2512 } 2513 inode_lock(xb_alloc_inode); 2514 2515 ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1); 2516 if (ret < 0) { 2517 mlog_errno(ret); 2518 goto out_mutex; 2519 } 2520 2521 handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE); 2522 if (IS_ERR(handle)) { 2523 ret = PTR_ERR(handle); 2524 mlog_errno(ret); 2525 goto out_unlock; 2526 } 2527 2528 ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh, 2529 bit, bg_blkno, 1); 2530 if (ret < 0) 2531 mlog_errno(ret); 2532 2533 ocfs2_commit_trans(osb, handle); 2534 out_unlock: 2535 ocfs2_inode_unlock(xb_alloc_inode, 1); 2536 brelse(xb_alloc_bh); 2537 out_mutex: 2538 inode_unlock(xb_alloc_inode); 2539 iput(xb_alloc_inode); 2540 out: 2541 brelse(blk_bh); 2542 return ret; 2543 } 2544 2545 /* 2546 * ocfs2_xattr_remove() 2547 * 2548 * Free extended attribute resources associated with this inode. 2549 */ 2550 int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh) 2551 { 2552 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2553 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 2554 struct ocfs2_refcount_tree *ref_tree = NULL; 2555 struct buffer_head *ref_root_bh = NULL; 2556 struct ocfs2_caching_info *ref_ci = NULL; 2557 handle_t *handle; 2558 int ret; 2559 2560 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) 2561 return 0; 2562 2563 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) 2564 return 0; 2565 2566 if (ocfs2_is_refcount_inode(inode)) { 2567 ret = ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb), 2568 le64_to_cpu(di->i_refcount_loc), 2569 1, &ref_tree, &ref_root_bh); 2570 if (ret) { 2571 mlog_errno(ret); 2572 goto out; 2573 } 2574 ref_ci = &ref_tree->rf_ci; 2575 2576 } 2577 2578 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 2579 ret = ocfs2_xattr_ibody_remove(inode, di_bh, 2580 ref_ci, ref_root_bh); 2581 if (ret < 0) { 2582 mlog_errno(ret); 2583 goto out; 2584 } 2585 } 2586 2587 if (di->i_xattr_loc) { 2588 ret = ocfs2_xattr_free_block(inode, 2589 le64_to_cpu(di->i_xattr_loc), 2590 ref_ci, ref_root_bh); 2591 if (ret < 0) { 2592 mlog_errno(ret); 2593 goto out; 2594 } 2595 } 2596 2597 handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), 2598 OCFS2_INODE_UPDATE_CREDITS); 2599 if (IS_ERR(handle)) { 2600 ret = PTR_ERR(handle); 2601 mlog_errno(ret); 2602 goto out; 2603 } 2604 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, 2605 OCFS2_JOURNAL_ACCESS_WRITE); 2606 if (ret) { 2607 mlog_errno(ret); 2608 goto out_commit; 2609 } 2610 2611 di->i_xattr_loc = 0; 2612 2613 spin_lock(&oi->ip_lock); 2614 oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL); 2615 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 2616 spin_unlock(&oi->ip_lock); 2617 ocfs2_update_inode_fsync_trans(handle, inode, 0); 2618 2619 ocfs2_journal_dirty(handle, di_bh); 2620 out_commit: 2621 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); 2622 out: 2623 if (ref_tree) 2624 ocfs2_unlock_refcount_tree(OCFS2_SB(inode->i_sb), ref_tree, 1); 2625 brelse(ref_root_bh); 2626 return ret; 2627 } 2628 2629 static int ocfs2_xattr_has_space_inline(struct inode *inode, 2630 struct ocfs2_dinode *di) 2631 { 2632 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2633 unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size; 2634 int free; 2635 2636 if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE) 2637 return 0; 2638 2639 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 2640 struct ocfs2_inline_data *idata = &di->id2.i_data; 2641 free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size); 2642 } else if (ocfs2_inode_is_fast_symlink(inode)) { 2643 free = ocfs2_fast_symlink_chars(inode->i_sb) - 2644 le64_to_cpu(di->i_size); 2645 } else { 2646 struct ocfs2_extent_list *el = &di->id2.i_list; 2647 free = (le16_to_cpu(el->l_count) - 2648 le16_to_cpu(el->l_next_free_rec)) * 2649 sizeof(struct ocfs2_extent_rec); 2650 } 2651 if (free >= xattrsize) 2652 return 1; 2653 2654 return 0; 2655 } 2656 2657 /* 2658 * ocfs2_xattr_ibody_find() 2659 * 2660 * Find extended attribute in inode block and 2661 * fill search info into struct ocfs2_xattr_search. 2662 */ 2663 static int ocfs2_xattr_ibody_find(struct inode *inode, 2664 int name_index, 2665 const char *name, 2666 struct ocfs2_xattr_search *xs) 2667 { 2668 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2669 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 2670 int ret; 2671 int has_space = 0; 2672 2673 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) 2674 return 0; 2675 2676 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { 2677 down_read(&oi->ip_alloc_sem); 2678 has_space = ocfs2_xattr_has_space_inline(inode, di); 2679 up_read(&oi->ip_alloc_sem); 2680 if (!has_space) 2681 return 0; 2682 } 2683 2684 xs->xattr_bh = xs->inode_bh; 2685 xs->end = (void *)di + inode->i_sb->s_blocksize; 2686 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) 2687 xs->header = (struct ocfs2_xattr_header *) 2688 (xs->end - le16_to_cpu(di->i_xattr_inline_size)); 2689 else 2690 xs->header = (struct ocfs2_xattr_header *) 2691 (xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size); 2692 xs->base = (void *)xs->header; 2693 xs->here = xs->header->xh_entries; 2694 2695 /* Find the named attribute. */ 2696 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 2697 ret = ocfs2_xattr_find_entry(name_index, name, xs); 2698 if (ret && ret != -ENODATA) 2699 return ret; 2700 xs->not_found = ret; 2701 } 2702 2703 return 0; 2704 } 2705 2706 static int ocfs2_xattr_ibody_init(struct inode *inode, 2707 struct buffer_head *di_bh, 2708 struct ocfs2_xattr_set_ctxt *ctxt) 2709 { 2710 int ret; 2711 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2712 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 2713 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2714 unsigned int xattrsize = osb->s_xattr_inline_size; 2715 2716 if (!ocfs2_xattr_has_space_inline(inode, di)) { 2717 ret = -ENOSPC; 2718 goto out; 2719 } 2720 2721 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), di_bh, 2722 OCFS2_JOURNAL_ACCESS_WRITE); 2723 if (ret) { 2724 mlog_errno(ret); 2725 goto out; 2726 } 2727 2728 /* 2729 * Adjust extent record count or inline data size 2730 * to reserve space for extended attribute. 2731 */ 2732 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 2733 struct ocfs2_inline_data *idata = &di->id2.i_data; 2734 le16_add_cpu(&idata->id_count, -xattrsize); 2735 } else if (!(ocfs2_inode_is_fast_symlink(inode))) { 2736 struct ocfs2_extent_list *el = &di->id2.i_list; 2737 le16_add_cpu(&el->l_count, -(xattrsize / 2738 sizeof(struct ocfs2_extent_rec))); 2739 } 2740 di->i_xattr_inline_size = cpu_to_le16(xattrsize); 2741 2742 spin_lock(&oi->ip_lock); 2743 oi->ip_dyn_features |= OCFS2_INLINE_XATTR_FL|OCFS2_HAS_XATTR_FL; 2744 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 2745 spin_unlock(&oi->ip_lock); 2746 2747 ocfs2_journal_dirty(ctxt->handle, di_bh); 2748 2749 out: 2750 return ret; 2751 } 2752 2753 /* 2754 * ocfs2_xattr_ibody_set() 2755 * 2756 * Set, replace or remove an extended attribute into inode block. 2757 * 2758 */ 2759 static int ocfs2_xattr_ibody_set(struct inode *inode, 2760 struct ocfs2_xattr_info *xi, 2761 struct ocfs2_xattr_search *xs, 2762 struct ocfs2_xattr_set_ctxt *ctxt) 2763 { 2764 int ret; 2765 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2766 struct ocfs2_xa_loc loc; 2767 2768 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) 2769 return -ENOSPC; 2770 2771 down_write(&oi->ip_alloc_sem); 2772 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { 2773 ret = ocfs2_xattr_ibody_init(inode, xs->inode_bh, ctxt); 2774 if (ret) { 2775 if (ret != -ENOSPC) 2776 mlog_errno(ret); 2777 goto out; 2778 } 2779 } 2780 2781 ocfs2_init_dinode_xa_loc(&loc, inode, xs->inode_bh, 2782 xs->not_found ? NULL : xs->here); 2783 ret = ocfs2_xa_set(&loc, xi, ctxt); 2784 if (ret) { 2785 if (ret != -ENOSPC) 2786 mlog_errno(ret); 2787 goto out; 2788 } 2789 xs->here = loc.xl_entry; 2790 2791 out: 2792 up_write(&oi->ip_alloc_sem); 2793 2794 return ret; 2795 } 2796 2797 /* 2798 * ocfs2_xattr_block_find() 2799 * 2800 * Find extended attribute in external block and 2801 * fill search info into struct ocfs2_xattr_search. 2802 */ 2803 static int ocfs2_xattr_block_find(struct inode *inode, 2804 int name_index, 2805 const char *name, 2806 struct ocfs2_xattr_search *xs) 2807 { 2808 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 2809 struct buffer_head *blk_bh = NULL; 2810 struct ocfs2_xattr_block *xb; 2811 int ret = 0; 2812 2813 if (!di->i_xattr_loc) 2814 return ret; 2815 2816 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc), 2817 &blk_bh); 2818 if (ret < 0) { 2819 mlog_errno(ret); 2820 return ret; 2821 } 2822 2823 xs->xattr_bh = blk_bh; 2824 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 2825 2826 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 2827 xs->header = &xb->xb_attrs.xb_header; 2828 xs->base = (void *)xs->header; 2829 xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size; 2830 xs->here = xs->header->xh_entries; 2831 2832 ret = ocfs2_xattr_find_entry(name_index, name, xs); 2833 } else 2834 ret = ocfs2_xattr_index_block_find(inode, blk_bh, 2835 name_index, 2836 name, xs); 2837 2838 if (ret && ret != -ENODATA) { 2839 xs->xattr_bh = NULL; 2840 goto cleanup; 2841 } 2842 xs->not_found = ret; 2843 return 0; 2844 cleanup: 2845 brelse(blk_bh); 2846 2847 return ret; 2848 } 2849 2850 static int ocfs2_create_xattr_block(struct inode *inode, 2851 struct buffer_head *inode_bh, 2852 struct ocfs2_xattr_set_ctxt *ctxt, 2853 int indexed, 2854 struct buffer_head **ret_bh) 2855 { 2856 int ret; 2857 u16 suballoc_bit_start; 2858 u32 num_got; 2859 u64 suballoc_loc, first_blkno; 2860 struct ocfs2_dinode *di = (struct ocfs2_dinode *)inode_bh->b_data; 2861 struct buffer_head *new_bh = NULL; 2862 struct ocfs2_xattr_block *xblk; 2863 2864 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), 2865 inode_bh, OCFS2_JOURNAL_ACCESS_CREATE); 2866 if (ret < 0) { 2867 mlog_errno(ret); 2868 goto end; 2869 } 2870 2871 ret = ocfs2_claim_metadata(ctxt->handle, ctxt->meta_ac, 1, 2872 &suballoc_loc, &suballoc_bit_start, 2873 &num_got, &first_blkno); 2874 if (ret < 0) { 2875 mlog_errno(ret); 2876 goto end; 2877 } 2878 2879 new_bh = sb_getblk(inode->i_sb, first_blkno); 2880 if (!new_bh) { 2881 ret = -ENOMEM; 2882 mlog_errno(ret); 2883 goto end; 2884 } 2885 2886 ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh); 2887 2888 ret = ocfs2_journal_access_xb(ctxt->handle, INODE_CACHE(inode), 2889 new_bh, 2890 OCFS2_JOURNAL_ACCESS_CREATE); 2891 if (ret < 0) { 2892 mlog_errno(ret); 2893 goto end; 2894 } 2895 2896 /* Initialize ocfs2_xattr_block */ 2897 xblk = (struct ocfs2_xattr_block *)new_bh->b_data; 2898 memset(xblk, 0, inode->i_sb->s_blocksize); 2899 strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE); 2900 xblk->xb_suballoc_slot = cpu_to_le16(ctxt->meta_ac->ac_alloc_slot); 2901 xblk->xb_suballoc_loc = cpu_to_le64(suballoc_loc); 2902 xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start); 2903 xblk->xb_fs_generation = 2904 cpu_to_le32(OCFS2_SB(inode->i_sb)->fs_generation); 2905 xblk->xb_blkno = cpu_to_le64(first_blkno); 2906 if (indexed) { 2907 struct ocfs2_xattr_tree_root *xr = &xblk->xb_attrs.xb_root; 2908 xr->xt_clusters = cpu_to_le32(1); 2909 xr->xt_last_eb_blk = 0; 2910 xr->xt_list.l_tree_depth = 0; 2911 xr->xt_list.l_count = cpu_to_le16( 2912 ocfs2_xattr_recs_per_xb(inode->i_sb)); 2913 xr->xt_list.l_next_free_rec = cpu_to_le16(1); 2914 xblk->xb_flags = cpu_to_le16(OCFS2_XATTR_INDEXED); 2915 } 2916 ocfs2_journal_dirty(ctxt->handle, new_bh); 2917 2918 /* Add it to the inode */ 2919 di->i_xattr_loc = cpu_to_le64(first_blkno); 2920 2921 spin_lock(&OCFS2_I(inode)->ip_lock); 2922 OCFS2_I(inode)->ip_dyn_features |= OCFS2_HAS_XATTR_FL; 2923 di->i_dyn_features = cpu_to_le16(OCFS2_I(inode)->ip_dyn_features); 2924 spin_unlock(&OCFS2_I(inode)->ip_lock); 2925 2926 ocfs2_journal_dirty(ctxt->handle, inode_bh); 2927 2928 *ret_bh = new_bh; 2929 new_bh = NULL; 2930 2931 end: 2932 brelse(new_bh); 2933 return ret; 2934 } 2935 2936 /* 2937 * ocfs2_xattr_block_set() 2938 * 2939 * Set, replace or remove an extended attribute into external block. 2940 * 2941 */ 2942 static int ocfs2_xattr_block_set(struct inode *inode, 2943 struct ocfs2_xattr_info *xi, 2944 struct ocfs2_xattr_search *xs, 2945 struct ocfs2_xattr_set_ctxt *ctxt) 2946 { 2947 struct buffer_head *new_bh = NULL; 2948 struct ocfs2_xattr_block *xblk = NULL; 2949 int ret; 2950 struct ocfs2_xa_loc loc; 2951 2952 if (!xs->xattr_bh) { 2953 ret = ocfs2_create_xattr_block(inode, xs->inode_bh, ctxt, 2954 0, &new_bh); 2955 if (ret) { 2956 mlog_errno(ret); 2957 goto end; 2958 } 2959 2960 xs->xattr_bh = new_bh; 2961 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; 2962 xs->header = &xblk->xb_attrs.xb_header; 2963 xs->base = (void *)xs->header; 2964 xs->end = (void *)xblk + inode->i_sb->s_blocksize; 2965 xs->here = xs->header->xh_entries; 2966 } else 2967 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; 2968 2969 if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) { 2970 ocfs2_init_xattr_block_xa_loc(&loc, inode, xs->xattr_bh, 2971 xs->not_found ? NULL : xs->here); 2972 2973 ret = ocfs2_xa_set(&loc, xi, ctxt); 2974 if (!ret) 2975 xs->here = loc.xl_entry; 2976 else if ((ret != -ENOSPC) || ctxt->set_abort) 2977 goto end; 2978 else { 2979 ret = ocfs2_xattr_create_index_block(inode, xs, ctxt); 2980 if (ret) 2981 goto end; 2982 } 2983 } 2984 2985 if (le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED) 2986 ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt); 2987 2988 end: 2989 return ret; 2990 } 2991 2992 /* Check whether the new xattr can be inserted into the inode. */ 2993 static int ocfs2_xattr_can_be_in_inode(struct inode *inode, 2994 struct ocfs2_xattr_info *xi, 2995 struct ocfs2_xattr_search *xs) 2996 { 2997 struct ocfs2_xattr_entry *last; 2998 int free, i; 2999 size_t min_offs = xs->end - xs->base; 3000 3001 if (!xs->header) 3002 return 0; 3003 3004 last = xs->header->xh_entries; 3005 3006 for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) { 3007 size_t offs = le16_to_cpu(last->xe_name_offset); 3008 if (offs < min_offs) 3009 min_offs = offs; 3010 last += 1; 3011 } 3012 3013 free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP; 3014 if (free < 0) 3015 return 0; 3016 3017 BUG_ON(!xs->not_found); 3018 3019 if (free >= (sizeof(struct ocfs2_xattr_entry) + namevalue_size_xi(xi))) 3020 return 1; 3021 3022 return 0; 3023 } 3024 3025 static int ocfs2_calc_xattr_set_need(struct inode *inode, 3026 struct ocfs2_dinode *di, 3027 struct ocfs2_xattr_info *xi, 3028 struct ocfs2_xattr_search *xis, 3029 struct ocfs2_xattr_search *xbs, 3030 int *clusters_need, 3031 int *meta_need, 3032 int *credits_need) 3033 { 3034 int ret = 0, old_in_xb = 0; 3035 int clusters_add = 0, meta_add = 0, credits = 0; 3036 struct buffer_head *bh = NULL; 3037 struct ocfs2_xattr_block *xb = NULL; 3038 struct ocfs2_xattr_entry *xe = NULL; 3039 struct ocfs2_xattr_value_root *xv = NULL; 3040 char *base = NULL; 3041 int name_offset, name_len = 0; 3042 u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, 3043 xi->xi_value_len); 3044 u64 value_size; 3045 3046 /* 3047 * Calculate the clusters we need to write. 3048 * No matter whether we replace an old one or add a new one, 3049 * we need this for writing. 3050 */ 3051 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) 3052 credits += new_clusters * 3053 ocfs2_clusters_to_blocks(inode->i_sb, 1); 3054 3055 if (xis->not_found && xbs->not_found) { 3056 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3057 3058 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 3059 clusters_add += new_clusters; 3060 credits += ocfs2_calc_extend_credits(inode->i_sb, 3061 &def_xv.xv.xr_list); 3062 } 3063 3064 goto meta_guess; 3065 } 3066 3067 if (!xis->not_found) { 3068 xe = xis->here; 3069 name_offset = le16_to_cpu(xe->xe_name_offset); 3070 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 3071 base = xis->base; 3072 credits += OCFS2_INODE_UPDATE_CREDITS; 3073 } else { 3074 int i, block_off = 0; 3075 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; 3076 xe = xbs->here; 3077 name_offset = le16_to_cpu(xe->xe_name_offset); 3078 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 3079 i = xbs->here - xbs->header->xh_entries; 3080 old_in_xb = 1; 3081 3082 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 3083 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 3084 bucket_xh(xbs->bucket), 3085 i, &block_off, 3086 &name_offset); 3087 base = bucket_block(xbs->bucket, block_off); 3088 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3089 } else { 3090 base = xbs->base; 3091 credits += OCFS2_XATTR_BLOCK_UPDATE_CREDITS; 3092 } 3093 } 3094 3095 /* 3096 * delete a xattr doesn't need metadata and cluster allocation. 3097 * so just calculate the credits and return. 3098 * 3099 * The credits for removing the value tree will be extended 3100 * by ocfs2_remove_extent itself. 3101 */ 3102 if (!xi->xi_value) { 3103 if (!ocfs2_xattr_is_local(xe)) 3104 credits += ocfs2_remove_extent_credits(inode->i_sb); 3105 3106 goto out; 3107 } 3108 3109 /* do cluster allocation guess first. */ 3110 value_size = le64_to_cpu(xe->xe_value_size); 3111 3112 if (old_in_xb) { 3113 /* 3114 * In xattr set, we always try to set the xe in inode first, 3115 * so if it can be inserted into inode successfully, the old 3116 * one will be removed from the xattr block, and this xattr 3117 * will be inserted into inode as a new xattr in inode. 3118 */ 3119 if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) { 3120 clusters_add += new_clusters; 3121 credits += ocfs2_remove_extent_credits(inode->i_sb) + 3122 OCFS2_INODE_UPDATE_CREDITS; 3123 if (!ocfs2_xattr_is_local(xe)) 3124 credits += ocfs2_calc_extend_credits( 3125 inode->i_sb, 3126 &def_xv.xv.xr_list); 3127 goto out; 3128 } 3129 } 3130 3131 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 3132 /* the new values will be stored outside. */ 3133 u32 old_clusters = 0; 3134 3135 if (!ocfs2_xattr_is_local(xe)) { 3136 old_clusters = ocfs2_clusters_for_bytes(inode->i_sb, 3137 value_size); 3138 xv = (struct ocfs2_xattr_value_root *) 3139 (base + name_offset + name_len); 3140 value_size = OCFS2_XATTR_ROOT_SIZE; 3141 } else 3142 xv = &def_xv.xv; 3143 3144 if (old_clusters >= new_clusters) { 3145 credits += ocfs2_remove_extent_credits(inode->i_sb); 3146 goto out; 3147 } else { 3148 meta_add += ocfs2_extend_meta_needed(&xv->xr_list); 3149 clusters_add += new_clusters - old_clusters; 3150 credits += ocfs2_calc_extend_credits(inode->i_sb, 3151 &xv->xr_list); 3152 if (value_size >= OCFS2_XATTR_ROOT_SIZE) 3153 goto out; 3154 } 3155 } else { 3156 /* 3157 * Now the new value will be stored inside. So if the new 3158 * value is smaller than the size of value root or the old 3159 * value, we don't need any allocation, otherwise we have 3160 * to guess metadata allocation. 3161 */ 3162 if ((ocfs2_xattr_is_local(xe) && 3163 (value_size >= xi->xi_value_len)) || 3164 (!ocfs2_xattr_is_local(xe) && 3165 OCFS2_XATTR_ROOT_SIZE >= xi->xi_value_len)) 3166 goto out; 3167 } 3168 3169 meta_guess: 3170 /* calculate metadata allocation. */ 3171 if (di->i_xattr_loc) { 3172 if (!xbs->xattr_bh) { 3173 ret = ocfs2_read_xattr_block(inode, 3174 le64_to_cpu(di->i_xattr_loc), 3175 &bh); 3176 if (ret) { 3177 mlog_errno(ret); 3178 goto out; 3179 } 3180 3181 xb = (struct ocfs2_xattr_block *)bh->b_data; 3182 } else 3183 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; 3184 3185 /* 3186 * If there is already an xattr tree, good, we can calculate 3187 * like other b-trees. Otherwise we may have the chance of 3188 * create a tree, the credit calculation is borrowed from 3189 * ocfs2_calc_extend_credits with root_el = NULL. And the 3190 * new tree will be cluster based, so no meta is needed. 3191 */ 3192 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 3193 struct ocfs2_extent_list *el = 3194 &xb->xb_attrs.xb_root.xt_list; 3195 meta_add += ocfs2_extend_meta_needed(el); 3196 credits += ocfs2_calc_extend_credits(inode->i_sb, 3197 el); 3198 } else 3199 credits += OCFS2_SUBALLOC_ALLOC + 1; 3200 3201 /* 3202 * This cluster will be used either for new bucket or for 3203 * new xattr block. 3204 * If the cluster size is the same as the bucket size, one 3205 * more is needed since we may need to extend the bucket 3206 * also. 3207 */ 3208 clusters_add += 1; 3209 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3210 if (OCFS2_XATTR_BUCKET_SIZE == 3211 OCFS2_SB(inode->i_sb)->s_clustersize) { 3212 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3213 clusters_add += 1; 3214 } 3215 } else { 3216 credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; 3217 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 3218 struct ocfs2_extent_list *el = &def_xv.xv.xr_list; 3219 meta_add += ocfs2_extend_meta_needed(el); 3220 credits += ocfs2_calc_extend_credits(inode->i_sb, 3221 el); 3222 } else { 3223 meta_add += 1; 3224 } 3225 } 3226 out: 3227 if (clusters_need) 3228 *clusters_need = clusters_add; 3229 if (meta_need) 3230 *meta_need = meta_add; 3231 if (credits_need) 3232 *credits_need = credits; 3233 brelse(bh); 3234 return ret; 3235 } 3236 3237 static int ocfs2_init_xattr_set_ctxt(struct inode *inode, 3238 struct ocfs2_dinode *di, 3239 struct ocfs2_xattr_info *xi, 3240 struct ocfs2_xattr_search *xis, 3241 struct ocfs2_xattr_search *xbs, 3242 struct ocfs2_xattr_set_ctxt *ctxt, 3243 int extra_meta, 3244 int *credits) 3245 { 3246 int clusters_add, meta_add, ret; 3247 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 3248 3249 memset(ctxt, 0, sizeof(struct ocfs2_xattr_set_ctxt)); 3250 3251 ocfs2_init_dealloc_ctxt(&ctxt->dealloc); 3252 3253 ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs, 3254 &clusters_add, &meta_add, credits); 3255 if (ret) { 3256 mlog_errno(ret); 3257 return ret; 3258 } 3259 3260 meta_add += extra_meta; 3261 trace_ocfs2_init_xattr_set_ctxt(xi->xi_name, meta_add, 3262 clusters_add, *credits); 3263 3264 if (meta_add) { 3265 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, 3266 &ctxt->meta_ac); 3267 if (ret) { 3268 mlog_errno(ret); 3269 goto out; 3270 } 3271 } 3272 3273 if (clusters_add) { 3274 ret = ocfs2_reserve_clusters(osb, clusters_add, &ctxt->data_ac); 3275 if (ret) 3276 mlog_errno(ret); 3277 } 3278 out: 3279 if (ret) { 3280 if (ctxt->meta_ac) { 3281 ocfs2_free_alloc_context(ctxt->meta_ac); 3282 ctxt->meta_ac = NULL; 3283 } 3284 3285 /* 3286 * We cannot have an error and a non null ctxt->data_ac. 3287 */ 3288 } 3289 3290 return ret; 3291 } 3292 3293 static int __ocfs2_xattr_set_handle(struct inode *inode, 3294 struct ocfs2_dinode *di, 3295 struct ocfs2_xattr_info *xi, 3296 struct ocfs2_xattr_search *xis, 3297 struct ocfs2_xattr_search *xbs, 3298 struct ocfs2_xattr_set_ctxt *ctxt) 3299 { 3300 int ret = 0, credits, old_found; 3301 3302 if (!xi->xi_value) { 3303 /* Remove existing extended attribute */ 3304 if (!xis->not_found) 3305 ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt); 3306 else if (!xbs->not_found) 3307 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); 3308 } else { 3309 /* We always try to set extended attribute into inode first*/ 3310 ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt); 3311 if (!ret && !xbs->not_found) { 3312 /* 3313 * If succeed and that extended attribute existing in 3314 * external block, then we will remove it. 3315 */ 3316 xi->xi_value = NULL; 3317 xi->xi_value_len = 0; 3318 3319 old_found = xis->not_found; 3320 xis->not_found = -ENODATA; 3321 ret = ocfs2_calc_xattr_set_need(inode, 3322 di, 3323 xi, 3324 xis, 3325 xbs, 3326 NULL, 3327 NULL, 3328 &credits); 3329 xis->not_found = old_found; 3330 if (ret) { 3331 mlog_errno(ret); 3332 goto out; 3333 } 3334 3335 ret = ocfs2_extend_trans(ctxt->handle, credits); 3336 if (ret) { 3337 mlog_errno(ret); 3338 goto out; 3339 } 3340 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); 3341 } else if ((ret == -ENOSPC) && !ctxt->set_abort) { 3342 if (di->i_xattr_loc && !xbs->xattr_bh) { 3343 ret = ocfs2_xattr_block_find(inode, 3344 xi->xi_name_index, 3345 xi->xi_name, xbs); 3346 if (ret) 3347 goto out; 3348 3349 old_found = xis->not_found; 3350 xis->not_found = -ENODATA; 3351 ret = ocfs2_calc_xattr_set_need(inode, 3352 di, 3353 xi, 3354 xis, 3355 xbs, 3356 NULL, 3357 NULL, 3358 &credits); 3359 xis->not_found = old_found; 3360 if (ret) { 3361 mlog_errno(ret); 3362 goto out; 3363 } 3364 3365 ret = ocfs2_extend_trans(ctxt->handle, credits); 3366 if (ret) { 3367 mlog_errno(ret); 3368 goto out; 3369 } 3370 } 3371 /* 3372 * If no space in inode, we will set extended attribute 3373 * into external block. 3374 */ 3375 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); 3376 if (ret) 3377 goto out; 3378 if (!xis->not_found) { 3379 /* 3380 * If succeed and that extended attribute 3381 * existing in inode, we will remove it. 3382 */ 3383 xi->xi_value = NULL; 3384 xi->xi_value_len = 0; 3385 xbs->not_found = -ENODATA; 3386 ret = ocfs2_calc_xattr_set_need(inode, 3387 di, 3388 xi, 3389 xis, 3390 xbs, 3391 NULL, 3392 NULL, 3393 &credits); 3394 if (ret) { 3395 mlog_errno(ret); 3396 goto out; 3397 } 3398 3399 ret = ocfs2_extend_trans(ctxt->handle, credits); 3400 if (ret) { 3401 mlog_errno(ret); 3402 goto out; 3403 } 3404 ret = ocfs2_xattr_ibody_set(inode, xi, 3405 xis, ctxt); 3406 } 3407 } 3408 } 3409 3410 if (!ret) { 3411 /* Update inode ctime. */ 3412 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), 3413 xis->inode_bh, 3414 OCFS2_JOURNAL_ACCESS_WRITE); 3415 if (ret) { 3416 mlog_errno(ret); 3417 goto out; 3418 } 3419 3420 inode->i_ctime = current_time(inode); 3421 di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); 3422 di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); 3423 ocfs2_journal_dirty(ctxt->handle, xis->inode_bh); 3424 } 3425 out: 3426 return ret; 3427 } 3428 3429 /* 3430 * This function only called duing creating inode 3431 * for init security/acl xattrs of the new inode. 3432 * All transanction credits have been reserved in mknod. 3433 */ 3434 int ocfs2_xattr_set_handle(handle_t *handle, 3435 struct inode *inode, 3436 struct buffer_head *di_bh, 3437 int name_index, 3438 const char *name, 3439 const void *value, 3440 size_t value_len, 3441 int flags, 3442 struct ocfs2_alloc_context *meta_ac, 3443 struct ocfs2_alloc_context *data_ac) 3444 { 3445 struct ocfs2_dinode *di; 3446 int ret; 3447 3448 struct ocfs2_xattr_info xi = { 3449 .xi_name_index = name_index, 3450 .xi_name = name, 3451 .xi_name_len = strlen(name), 3452 .xi_value = value, 3453 .xi_value_len = value_len, 3454 }; 3455 3456 struct ocfs2_xattr_search xis = { 3457 .not_found = -ENODATA, 3458 }; 3459 3460 struct ocfs2_xattr_search xbs = { 3461 .not_found = -ENODATA, 3462 }; 3463 3464 struct ocfs2_xattr_set_ctxt ctxt = { 3465 .handle = handle, 3466 .meta_ac = meta_ac, 3467 .data_ac = data_ac, 3468 }; 3469 3470 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) 3471 return -EOPNOTSUPP; 3472 3473 /* 3474 * In extreme situation, may need xattr bucket when 3475 * block size is too small. And we have already reserved 3476 * the credits for bucket in mknod. 3477 */ 3478 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) { 3479 xbs.bucket = ocfs2_xattr_bucket_new(inode); 3480 if (!xbs.bucket) { 3481 mlog_errno(-ENOMEM); 3482 return -ENOMEM; 3483 } 3484 } 3485 3486 xis.inode_bh = xbs.inode_bh = di_bh; 3487 di = (struct ocfs2_dinode *)di_bh->b_data; 3488 3489 down_write(&OCFS2_I(inode)->ip_xattr_sem); 3490 3491 ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis); 3492 if (ret) 3493 goto cleanup; 3494 if (xis.not_found) { 3495 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs); 3496 if (ret) 3497 goto cleanup; 3498 } 3499 3500 ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt); 3501 3502 cleanup: 3503 up_write(&OCFS2_I(inode)->ip_xattr_sem); 3504 brelse(xbs.xattr_bh); 3505 ocfs2_xattr_bucket_free(xbs.bucket); 3506 3507 return ret; 3508 } 3509 3510 /* 3511 * ocfs2_xattr_set() 3512 * 3513 * Set, replace or remove an extended attribute for this inode. 3514 * value is NULL to remove an existing extended attribute, else either 3515 * create or replace an extended attribute. 3516 */ 3517 int ocfs2_xattr_set(struct inode *inode, 3518 int name_index, 3519 const char *name, 3520 const void *value, 3521 size_t value_len, 3522 int flags) 3523 { 3524 struct buffer_head *di_bh = NULL; 3525 struct ocfs2_dinode *di; 3526 int ret, credits, had_lock, ref_meta = 0, ref_credits = 0; 3527 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 3528 struct inode *tl_inode = osb->osb_tl_inode; 3529 struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, NULL, }; 3530 struct ocfs2_refcount_tree *ref_tree = NULL; 3531 struct ocfs2_lock_holder oh; 3532 3533 struct ocfs2_xattr_info xi = { 3534 .xi_name_index = name_index, 3535 .xi_name = name, 3536 .xi_name_len = strlen(name), 3537 .xi_value = value, 3538 .xi_value_len = value_len, 3539 }; 3540 3541 struct ocfs2_xattr_search xis = { 3542 .not_found = -ENODATA, 3543 }; 3544 3545 struct ocfs2_xattr_search xbs = { 3546 .not_found = -ENODATA, 3547 }; 3548 3549 if (!ocfs2_supports_xattr(osb)) 3550 return -EOPNOTSUPP; 3551 3552 /* 3553 * Only xbs will be used on indexed trees. xis doesn't need a 3554 * bucket. 3555 */ 3556 xbs.bucket = ocfs2_xattr_bucket_new(inode); 3557 if (!xbs.bucket) { 3558 mlog_errno(-ENOMEM); 3559 return -ENOMEM; 3560 } 3561 3562 had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 1, &oh); 3563 if (had_lock < 0) { 3564 ret = had_lock; 3565 mlog_errno(ret); 3566 goto cleanup_nolock; 3567 } 3568 xis.inode_bh = xbs.inode_bh = di_bh; 3569 di = (struct ocfs2_dinode *)di_bh->b_data; 3570 3571 down_write(&OCFS2_I(inode)->ip_xattr_sem); 3572 /* 3573 * Scan inode and external block to find the same name 3574 * extended attribute and collect search information. 3575 */ 3576 ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis); 3577 if (ret) 3578 goto cleanup; 3579 if (xis.not_found) { 3580 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs); 3581 if (ret) 3582 goto cleanup; 3583 } 3584 3585 if (xis.not_found && xbs.not_found) { 3586 ret = -ENODATA; 3587 if (flags & XATTR_REPLACE) 3588 goto cleanup; 3589 ret = 0; 3590 if (!value) 3591 goto cleanup; 3592 } else { 3593 ret = -EEXIST; 3594 if (flags & XATTR_CREATE) 3595 goto cleanup; 3596 } 3597 3598 /* Check whether the value is refcounted and do some preparation. */ 3599 if (ocfs2_is_refcount_inode(inode) && 3600 (!xis.not_found || !xbs.not_found)) { 3601 ret = ocfs2_prepare_refcount_xattr(inode, di, &xi, 3602 &xis, &xbs, &ref_tree, 3603 &ref_meta, &ref_credits); 3604 if (ret) { 3605 mlog_errno(ret); 3606 goto cleanup; 3607 } 3608 } 3609 3610 inode_lock(tl_inode); 3611 3612 if (ocfs2_truncate_log_needs_flush(osb)) { 3613 ret = __ocfs2_flush_truncate_log(osb); 3614 if (ret < 0) { 3615 inode_unlock(tl_inode); 3616 mlog_errno(ret); 3617 goto cleanup; 3618 } 3619 } 3620 inode_unlock(tl_inode); 3621 3622 ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis, 3623 &xbs, &ctxt, ref_meta, &credits); 3624 if (ret) { 3625 mlog_errno(ret); 3626 goto cleanup; 3627 } 3628 3629 /* we need to update inode's ctime field, so add credit for it. */ 3630 credits += OCFS2_INODE_UPDATE_CREDITS; 3631 ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits); 3632 if (IS_ERR(ctxt.handle)) { 3633 ret = PTR_ERR(ctxt.handle); 3634 mlog_errno(ret); 3635 goto out_free_ac; 3636 } 3637 3638 ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt); 3639 ocfs2_update_inode_fsync_trans(ctxt.handle, inode, 0); 3640 3641 ocfs2_commit_trans(osb, ctxt.handle); 3642 3643 out_free_ac: 3644 if (ctxt.data_ac) 3645 ocfs2_free_alloc_context(ctxt.data_ac); 3646 if (ctxt.meta_ac) 3647 ocfs2_free_alloc_context(ctxt.meta_ac); 3648 if (ocfs2_dealloc_has_cluster(&ctxt.dealloc)) 3649 ocfs2_schedule_truncate_log_flush(osb, 1); 3650 ocfs2_run_deallocs(osb, &ctxt.dealloc); 3651 3652 cleanup: 3653 if (ref_tree) 3654 ocfs2_unlock_refcount_tree(osb, ref_tree, 1); 3655 up_write(&OCFS2_I(inode)->ip_xattr_sem); 3656 if (!value && !ret) { 3657 ret = ocfs2_try_remove_refcount_tree(inode, di_bh); 3658 if (ret) 3659 mlog_errno(ret); 3660 } 3661 ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock); 3662 cleanup_nolock: 3663 brelse(di_bh); 3664 brelse(xbs.xattr_bh); 3665 ocfs2_xattr_bucket_free(xbs.bucket); 3666 3667 return ret; 3668 } 3669 3670 /* 3671 * Find the xattr extent rec which may contains name_hash. 3672 * e_cpos will be the first name hash of the xattr rec. 3673 * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list. 3674 */ 3675 static int ocfs2_xattr_get_rec(struct inode *inode, 3676 u32 name_hash, 3677 u64 *p_blkno, 3678 u32 *e_cpos, 3679 u32 *num_clusters, 3680 struct ocfs2_extent_list *el) 3681 { 3682 int ret = 0, i; 3683 struct buffer_head *eb_bh = NULL; 3684 struct ocfs2_extent_block *eb; 3685 struct ocfs2_extent_rec *rec = NULL; 3686 u64 e_blkno = 0; 3687 3688 if (el->l_tree_depth) { 3689 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, name_hash, 3690 &eb_bh); 3691 if (ret) { 3692 mlog_errno(ret); 3693 goto out; 3694 } 3695 3696 eb = (struct ocfs2_extent_block *) eb_bh->b_data; 3697 el = &eb->h_list; 3698 3699 if (el->l_tree_depth) { 3700 ret = ocfs2_error(inode->i_sb, 3701 "Inode %lu has non zero tree depth in xattr tree block %llu\n", 3702 inode->i_ino, 3703 (unsigned long long)eb_bh->b_blocknr); 3704 goto out; 3705 } 3706 } 3707 3708 for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) { 3709 rec = &el->l_recs[i]; 3710 3711 if (le32_to_cpu(rec->e_cpos) <= name_hash) { 3712 e_blkno = le64_to_cpu(rec->e_blkno); 3713 break; 3714 } 3715 } 3716 3717 if (!e_blkno) { 3718 ret = ocfs2_error(inode->i_sb, "Inode %lu has bad extent record (%u, %u, 0) in xattr\n", 3719 inode->i_ino, 3720 le32_to_cpu(rec->e_cpos), 3721 ocfs2_rec_clusters(el, rec)); 3722 goto out; 3723 } 3724 3725 *p_blkno = le64_to_cpu(rec->e_blkno); 3726 *num_clusters = le16_to_cpu(rec->e_leaf_clusters); 3727 if (e_cpos) 3728 *e_cpos = le32_to_cpu(rec->e_cpos); 3729 out: 3730 brelse(eb_bh); 3731 return ret; 3732 } 3733 3734 typedef int (xattr_bucket_func)(struct inode *inode, 3735 struct ocfs2_xattr_bucket *bucket, 3736 void *para); 3737 3738 static int ocfs2_find_xe_in_bucket(struct inode *inode, 3739 struct ocfs2_xattr_bucket *bucket, 3740 int name_index, 3741 const char *name, 3742 u32 name_hash, 3743 u16 *xe_index, 3744 int *found) 3745 { 3746 int i, ret = 0, cmp = 1, block_off, new_offset; 3747 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 3748 size_t name_len = strlen(name); 3749 struct ocfs2_xattr_entry *xe = NULL; 3750 char *xe_name; 3751 3752 /* 3753 * We don't use binary search in the bucket because there 3754 * may be multiple entries with the same name hash. 3755 */ 3756 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 3757 xe = &xh->xh_entries[i]; 3758 3759 if (name_hash > le32_to_cpu(xe->xe_name_hash)) 3760 continue; 3761 else if (name_hash < le32_to_cpu(xe->xe_name_hash)) 3762 break; 3763 3764 cmp = name_index - ocfs2_xattr_get_type(xe); 3765 if (!cmp) 3766 cmp = name_len - xe->xe_name_len; 3767 if (cmp) 3768 continue; 3769 3770 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 3771 xh, 3772 i, 3773 &block_off, 3774 &new_offset); 3775 if (ret) { 3776 mlog_errno(ret); 3777 break; 3778 } 3779 3780 3781 xe_name = bucket_block(bucket, block_off) + new_offset; 3782 if (!memcmp(name, xe_name, name_len)) { 3783 *xe_index = i; 3784 *found = 1; 3785 ret = 0; 3786 break; 3787 } 3788 } 3789 3790 return ret; 3791 } 3792 3793 /* 3794 * Find the specified xattr entry in a series of buckets. 3795 * This series start from p_blkno and last for num_clusters. 3796 * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains 3797 * the num of the valid buckets. 3798 * 3799 * Return the buffer_head this xattr should reside in. And if the xattr's 3800 * hash is in the gap of 2 buckets, return the lower bucket. 3801 */ 3802 static int ocfs2_xattr_bucket_find(struct inode *inode, 3803 int name_index, 3804 const char *name, 3805 u32 name_hash, 3806 u64 p_blkno, 3807 u32 first_hash, 3808 u32 num_clusters, 3809 struct ocfs2_xattr_search *xs) 3810 { 3811 int ret, found = 0; 3812 struct ocfs2_xattr_header *xh = NULL; 3813 struct ocfs2_xattr_entry *xe = NULL; 3814 u16 index = 0; 3815 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3816 int low_bucket = 0, bucket, high_bucket; 3817 struct ocfs2_xattr_bucket *search; 3818 u64 blkno, lower_blkno = 0; 3819 3820 search = ocfs2_xattr_bucket_new(inode); 3821 if (!search) { 3822 ret = -ENOMEM; 3823 mlog_errno(ret); 3824 goto out; 3825 } 3826 3827 ret = ocfs2_read_xattr_bucket(search, p_blkno); 3828 if (ret) { 3829 mlog_errno(ret); 3830 goto out; 3831 } 3832 3833 xh = bucket_xh(search); 3834 high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1; 3835 while (low_bucket <= high_bucket) { 3836 ocfs2_xattr_bucket_relse(search); 3837 3838 bucket = (low_bucket + high_bucket) / 2; 3839 blkno = p_blkno + bucket * blk_per_bucket; 3840 ret = ocfs2_read_xattr_bucket(search, blkno); 3841 if (ret) { 3842 mlog_errno(ret); 3843 goto out; 3844 } 3845 3846 xh = bucket_xh(search); 3847 xe = &xh->xh_entries[0]; 3848 if (name_hash < le32_to_cpu(xe->xe_name_hash)) { 3849 high_bucket = bucket - 1; 3850 continue; 3851 } 3852 3853 /* 3854 * Check whether the hash of the last entry in our 3855 * bucket is larger than the search one. for an empty 3856 * bucket, the last one is also the first one. 3857 */ 3858 if (xh->xh_count) 3859 xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1]; 3860 3861 /* record lower_blkno which may be the insert place. */ 3862 lower_blkno = blkno; 3863 3864 if (name_hash > le32_to_cpu(xe->xe_name_hash)) { 3865 low_bucket = bucket + 1; 3866 continue; 3867 } 3868 3869 /* the searched xattr should reside in this bucket if exists. */ 3870 ret = ocfs2_find_xe_in_bucket(inode, search, 3871 name_index, name, name_hash, 3872 &index, &found); 3873 if (ret) { 3874 mlog_errno(ret); 3875 goto out; 3876 } 3877 break; 3878 } 3879 3880 /* 3881 * Record the bucket we have found. 3882 * When the xattr's hash value is in the gap of 2 buckets, we will 3883 * always set it to the previous bucket. 3884 */ 3885 if (!lower_blkno) 3886 lower_blkno = p_blkno; 3887 3888 /* This should be in cache - we just read it during the search */ 3889 ret = ocfs2_read_xattr_bucket(xs->bucket, lower_blkno); 3890 if (ret) { 3891 mlog_errno(ret); 3892 goto out; 3893 } 3894 3895 xs->header = bucket_xh(xs->bucket); 3896 xs->base = bucket_block(xs->bucket, 0); 3897 xs->end = xs->base + inode->i_sb->s_blocksize; 3898 3899 if (found) { 3900 xs->here = &xs->header->xh_entries[index]; 3901 trace_ocfs2_xattr_bucket_find(OCFS2_I(inode)->ip_blkno, 3902 name, name_index, name_hash, 3903 (unsigned long long)bucket_blkno(xs->bucket), 3904 index); 3905 } else 3906 ret = -ENODATA; 3907 3908 out: 3909 ocfs2_xattr_bucket_free(search); 3910 return ret; 3911 } 3912 3913 static int ocfs2_xattr_index_block_find(struct inode *inode, 3914 struct buffer_head *root_bh, 3915 int name_index, 3916 const char *name, 3917 struct ocfs2_xattr_search *xs) 3918 { 3919 int ret; 3920 struct ocfs2_xattr_block *xb = 3921 (struct ocfs2_xattr_block *)root_bh->b_data; 3922 struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root; 3923 struct ocfs2_extent_list *el = &xb_root->xt_list; 3924 u64 p_blkno = 0; 3925 u32 first_hash, num_clusters = 0; 3926 u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name)); 3927 3928 if (le16_to_cpu(el->l_next_free_rec) == 0) 3929 return -ENODATA; 3930 3931 trace_ocfs2_xattr_index_block_find(OCFS2_I(inode)->ip_blkno, 3932 name, name_index, name_hash, 3933 (unsigned long long)root_bh->b_blocknr, 3934 -1); 3935 3936 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash, 3937 &num_clusters, el); 3938 if (ret) { 3939 mlog_errno(ret); 3940 goto out; 3941 } 3942 3943 BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash); 3944 3945 trace_ocfs2_xattr_index_block_find_rec(OCFS2_I(inode)->ip_blkno, 3946 name, name_index, first_hash, 3947 (unsigned long long)p_blkno, 3948 num_clusters); 3949 3950 ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash, 3951 p_blkno, first_hash, num_clusters, xs); 3952 3953 out: 3954 return ret; 3955 } 3956 3957 static int ocfs2_iterate_xattr_buckets(struct inode *inode, 3958 u64 blkno, 3959 u32 clusters, 3960 xattr_bucket_func *func, 3961 void *para) 3962 { 3963 int i, ret = 0; 3964 u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)); 3965 u32 num_buckets = clusters * bpc; 3966 struct ocfs2_xattr_bucket *bucket; 3967 3968 bucket = ocfs2_xattr_bucket_new(inode); 3969 if (!bucket) { 3970 mlog_errno(-ENOMEM); 3971 return -ENOMEM; 3972 } 3973 3974 trace_ocfs2_iterate_xattr_buckets( 3975 (unsigned long long)OCFS2_I(inode)->ip_blkno, 3976 (unsigned long long)blkno, clusters); 3977 3978 for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) { 3979 ret = ocfs2_read_xattr_bucket(bucket, blkno); 3980 if (ret) { 3981 mlog_errno(ret); 3982 break; 3983 } 3984 3985 /* 3986 * The real bucket num in this series of blocks is stored 3987 * in the 1st bucket. 3988 */ 3989 if (i == 0) 3990 num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets); 3991 3992 trace_ocfs2_iterate_xattr_bucket((unsigned long long)blkno, 3993 le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash)); 3994 if (func) { 3995 ret = func(inode, bucket, para); 3996 if (ret && ret != -ERANGE) 3997 mlog_errno(ret); 3998 /* Fall through to bucket_relse() */ 3999 } 4000 4001 ocfs2_xattr_bucket_relse(bucket); 4002 if (ret) 4003 break; 4004 } 4005 4006 ocfs2_xattr_bucket_free(bucket); 4007 return ret; 4008 } 4009 4010 struct ocfs2_xattr_tree_list { 4011 char *buffer; 4012 size_t buffer_size; 4013 size_t result; 4014 }; 4015 4016 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb, 4017 struct ocfs2_xattr_header *xh, 4018 int index, 4019 int *block_off, 4020 int *new_offset) 4021 { 4022 u16 name_offset; 4023 4024 if (index < 0 || index >= le16_to_cpu(xh->xh_count)) 4025 return -EINVAL; 4026 4027 name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset); 4028 4029 *block_off = name_offset >> sb->s_blocksize_bits; 4030 *new_offset = name_offset % sb->s_blocksize; 4031 4032 return 0; 4033 } 4034 4035 static int ocfs2_list_xattr_bucket(struct inode *inode, 4036 struct ocfs2_xattr_bucket *bucket, 4037 void *para) 4038 { 4039 int ret = 0, type; 4040 struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para; 4041 int i, block_off, new_offset; 4042 const char *name; 4043 4044 for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) { 4045 struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i]; 4046 type = ocfs2_xattr_get_type(entry); 4047 4048 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 4049 bucket_xh(bucket), 4050 i, 4051 &block_off, 4052 &new_offset); 4053 if (ret) 4054 break; 4055 4056 name = (const char *)bucket_block(bucket, block_off) + 4057 new_offset; 4058 ret = ocfs2_xattr_list_entry(inode->i_sb, 4059 xl->buffer, 4060 xl->buffer_size, 4061 &xl->result, 4062 type, name, 4063 entry->xe_name_len); 4064 if (ret) 4065 break; 4066 } 4067 4068 return ret; 4069 } 4070 4071 static int ocfs2_iterate_xattr_index_block(struct inode *inode, 4072 struct buffer_head *blk_bh, 4073 xattr_tree_rec_func *rec_func, 4074 void *para) 4075 { 4076 struct ocfs2_xattr_block *xb = 4077 (struct ocfs2_xattr_block *)blk_bh->b_data; 4078 struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list; 4079 int ret = 0; 4080 u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0; 4081 u64 p_blkno = 0; 4082 4083 if (!el->l_next_free_rec || !rec_func) 4084 return 0; 4085 4086 while (name_hash > 0) { 4087 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, 4088 &e_cpos, &num_clusters, el); 4089 if (ret) { 4090 mlog_errno(ret); 4091 break; 4092 } 4093 4094 ret = rec_func(inode, blk_bh, p_blkno, e_cpos, 4095 num_clusters, para); 4096 if (ret) { 4097 if (ret != -ERANGE) 4098 mlog_errno(ret); 4099 break; 4100 } 4101 4102 if (e_cpos == 0) 4103 break; 4104 4105 name_hash = e_cpos - 1; 4106 } 4107 4108 return ret; 4109 4110 } 4111 4112 static int ocfs2_list_xattr_tree_rec(struct inode *inode, 4113 struct buffer_head *root_bh, 4114 u64 blkno, u32 cpos, u32 len, void *para) 4115 { 4116 return ocfs2_iterate_xattr_buckets(inode, blkno, len, 4117 ocfs2_list_xattr_bucket, para); 4118 } 4119 4120 static int ocfs2_xattr_tree_list_index_block(struct inode *inode, 4121 struct buffer_head *blk_bh, 4122 char *buffer, 4123 size_t buffer_size) 4124 { 4125 int ret; 4126 struct ocfs2_xattr_tree_list xl = { 4127 .buffer = buffer, 4128 .buffer_size = buffer_size, 4129 .result = 0, 4130 }; 4131 4132 ret = ocfs2_iterate_xattr_index_block(inode, blk_bh, 4133 ocfs2_list_xattr_tree_rec, &xl); 4134 if (ret) { 4135 mlog_errno(ret); 4136 goto out; 4137 } 4138 4139 ret = xl.result; 4140 out: 4141 return ret; 4142 } 4143 4144 static int cmp_xe(const void *a, const void *b) 4145 { 4146 const struct ocfs2_xattr_entry *l = a, *r = b; 4147 u32 l_hash = le32_to_cpu(l->xe_name_hash); 4148 u32 r_hash = le32_to_cpu(r->xe_name_hash); 4149 4150 if (l_hash > r_hash) 4151 return 1; 4152 if (l_hash < r_hash) 4153 return -1; 4154 return 0; 4155 } 4156 4157 static void swap_xe(void *a, void *b, int size) 4158 { 4159 struct ocfs2_xattr_entry *l = a, *r = b, tmp; 4160 4161 tmp = *l; 4162 memcpy(l, r, sizeof(struct ocfs2_xattr_entry)); 4163 memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry)); 4164 } 4165 4166 /* 4167 * When the ocfs2_xattr_block is filled up, new bucket will be created 4168 * and all the xattr entries will be moved to the new bucket. 4169 * The header goes at the start of the bucket, and the names+values are 4170 * filled from the end. This is why *target starts as the last buffer. 4171 * Note: we need to sort the entries since they are not saved in order 4172 * in the ocfs2_xattr_block. 4173 */ 4174 static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode, 4175 struct buffer_head *xb_bh, 4176 struct ocfs2_xattr_bucket *bucket) 4177 { 4178 int i, blocksize = inode->i_sb->s_blocksize; 4179 int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 4180 u16 offset, size, off_change; 4181 struct ocfs2_xattr_entry *xe; 4182 struct ocfs2_xattr_block *xb = 4183 (struct ocfs2_xattr_block *)xb_bh->b_data; 4184 struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header; 4185 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 4186 u16 count = le16_to_cpu(xb_xh->xh_count); 4187 char *src = xb_bh->b_data; 4188 char *target = bucket_block(bucket, blks - 1); 4189 4190 trace_ocfs2_cp_xattr_block_to_bucket_begin( 4191 (unsigned long long)xb_bh->b_blocknr, 4192 (unsigned long long)bucket_blkno(bucket)); 4193 4194 for (i = 0; i < blks; i++) 4195 memset(bucket_block(bucket, i), 0, blocksize); 4196 4197 /* 4198 * Since the xe_name_offset is based on ocfs2_xattr_header, 4199 * there is a offset change corresponding to the change of 4200 * ocfs2_xattr_header's position. 4201 */ 4202 off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); 4203 xe = &xb_xh->xh_entries[count - 1]; 4204 offset = le16_to_cpu(xe->xe_name_offset) + off_change; 4205 size = blocksize - offset; 4206 4207 /* copy all the names and values. */ 4208 memcpy(target + offset, src + offset, size); 4209 4210 /* Init new header now. */ 4211 xh->xh_count = xb_xh->xh_count; 4212 xh->xh_num_buckets = cpu_to_le16(1); 4213 xh->xh_name_value_len = cpu_to_le16(size); 4214 xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size); 4215 4216 /* copy all the entries. */ 4217 target = bucket_block(bucket, 0); 4218 offset = offsetof(struct ocfs2_xattr_header, xh_entries); 4219 size = count * sizeof(struct ocfs2_xattr_entry); 4220 memcpy(target + offset, (char *)xb_xh + offset, size); 4221 4222 /* Change the xe offset for all the xe because of the move. */ 4223 off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize + 4224 offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); 4225 for (i = 0; i < count; i++) 4226 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change); 4227 4228 trace_ocfs2_cp_xattr_block_to_bucket_end(offset, size, off_change); 4229 4230 sort(target + offset, count, sizeof(struct ocfs2_xattr_entry), 4231 cmp_xe, swap_xe); 4232 } 4233 4234 /* 4235 * After we move xattr from block to index btree, we have to 4236 * update ocfs2_xattr_search to the new xe and base. 4237 * 4238 * When the entry is in xattr block, xattr_bh indicates the storage place. 4239 * While if the entry is in index b-tree, "bucket" indicates the 4240 * real place of the xattr. 4241 */ 4242 static void ocfs2_xattr_update_xattr_search(struct inode *inode, 4243 struct ocfs2_xattr_search *xs, 4244 struct buffer_head *old_bh) 4245 { 4246 char *buf = old_bh->b_data; 4247 struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf; 4248 struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header; 4249 int i; 4250 4251 xs->header = bucket_xh(xs->bucket); 4252 xs->base = bucket_block(xs->bucket, 0); 4253 xs->end = xs->base + inode->i_sb->s_blocksize; 4254 4255 if (xs->not_found) 4256 return; 4257 4258 i = xs->here - old_xh->xh_entries; 4259 xs->here = &xs->header->xh_entries[i]; 4260 } 4261 4262 static int ocfs2_xattr_create_index_block(struct inode *inode, 4263 struct ocfs2_xattr_search *xs, 4264 struct ocfs2_xattr_set_ctxt *ctxt) 4265 { 4266 int ret; 4267 u32 bit_off, len; 4268 u64 blkno; 4269 handle_t *handle = ctxt->handle; 4270 struct ocfs2_inode_info *oi = OCFS2_I(inode); 4271 struct buffer_head *xb_bh = xs->xattr_bh; 4272 struct ocfs2_xattr_block *xb = 4273 (struct ocfs2_xattr_block *)xb_bh->b_data; 4274 struct ocfs2_xattr_tree_root *xr; 4275 u16 xb_flags = le16_to_cpu(xb->xb_flags); 4276 4277 trace_ocfs2_xattr_create_index_block_begin( 4278 (unsigned long long)xb_bh->b_blocknr); 4279 4280 BUG_ON(xb_flags & OCFS2_XATTR_INDEXED); 4281 BUG_ON(!xs->bucket); 4282 4283 /* 4284 * XXX: 4285 * We can use this lock for now, and maybe move to a dedicated mutex 4286 * if performance becomes a problem later. 4287 */ 4288 down_write(&oi->ip_alloc_sem); 4289 4290 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), xb_bh, 4291 OCFS2_JOURNAL_ACCESS_WRITE); 4292 if (ret) { 4293 mlog_errno(ret); 4294 goto out; 4295 } 4296 4297 ret = __ocfs2_claim_clusters(handle, ctxt->data_ac, 4298 1, 1, &bit_off, &len); 4299 if (ret) { 4300 mlog_errno(ret); 4301 goto out; 4302 } 4303 4304 /* 4305 * The bucket may spread in many blocks, and 4306 * we will only touch the 1st block and the last block 4307 * in the whole bucket(one for entry and one for data). 4308 */ 4309 blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off); 4310 4311 trace_ocfs2_xattr_create_index_block((unsigned long long)blkno); 4312 4313 ret = ocfs2_init_xattr_bucket(xs->bucket, blkno, 1); 4314 if (ret) { 4315 mlog_errno(ret); 4316 goto out; 4317 } 4318 4319 ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket, 4320 OCFS2_JOURNAL_ACCESS_CREATE); 4321 if (ret) { 4322 mlog_errno(ret); 4323 goto out; 4324 } 4325 4326 ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket); 4327 ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket); 4328 4329 ocfs2_xattr_update_xattr_search(inode, xs, xb_bh); 4330 4331 /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */ 4332 memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize - 4333 offsetof(struct ocfs2_xattr_block, xb_attrs)); 4334 4335 xr = &xb->xb_attrs.xb_root; 4336 xr->xt_clusters = cpu_to_le32(1); 4337 xr->xt_last_eb_blk = 0; 4338 xr->xt_list.l_tree_depth = 0; 4339 xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb)); 4340 xr->xt_list.l_next_free_rec = cpu_to_le16(1); 4341 4342 xr->xt_list.l_recs[0].e_cpos = 0; 4343 xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno); 4344 xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1); 4345 4346 xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED); 4347 4348 ocfs2_journal_dirty(handle, xb_bh); 4349 4350 out: 4351 up_write(&oi->ip_alloc_sem); 4352 4353 return ret; 4354 } 4355 4356 static int cmp_xe_offset(const void *a, const void *b) 4357 { 4358 const struct ocfs2_xattr_entry *l = a, *r = b; 4359 u32 l_name_offset = le16_to_cpu(l->xe_name_offset); 4360 u32 r_name_offset = le16_to_cpu(r->xe_name_offset); 4361 4362 if (l_name_offset < r_name_offset) 4363 return 1; 4364 if (l_name_offset > r_name_offset) 4365 return -1; 4366 return 0; 4367 } 4368 4369 /* 4370 * defrag a xattr bucket if we find that the bucket has some 4371 * holes beteen name/value pairs. 4372 * We will move all the name/value pairs to the end of the bucket 4373 * so that we can spare some space for insertion. 4374 */ 4375 static int ocfs2_defrag_xattr_bucket(struct inode *inode, 4376 handle_t *handle, 4377 struct ocfs2_xattr_bucket *bucket) 4378 { 4379 int ret, i; 4380 size_t end, offset, len; 4381 struct ocfs2_xattr_header *xh; 4382 char *entries, *buf, *bucket_buf = NULL; 4383 u64 blkno = bucket_blkno(bucket); 4384 u16 xh_free_start; 4385 size_t blocksize = inode->i_sb->s_blocksize; 4386 struct ocfs2_xattr_entry *xe; 4387 4388 /* 4389 * In order to make the operation more efficient and generic, 4390 * we copy all the blocks into a contiguous memory and do the 4391 * defragment there, so if anything is error, we will not touch 4392 * the real block. 4393 */ 4394 bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS); 4395 if (!bucket_buf) { 4396 ret = -EIO; 4397 goto out; 4398 } 4399 4400 buf = bucket_buf; 4401 for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize) 4402 memcpy(buf, bucket_block(bucket, i), blocksize); 4403 4404 ret = ocfs2_xattr_bucket_journal_access(handle, bucket, 4405 OCFS2_JOURNAL_ACCESS_WRITE); 4406 if (ret < 0) { 4407 mlog_errno(ret); 4408 goto out; 4409 } 4410 4411 xh = (struct ocfs2_xattr_header *)bucket_buf; 4412 entries = (char *)xh->xh_entries; 4413 xh_free_start = le16_to_cpu(xh->xh_free_start); 4414 4415 trace_ocfs2_defrag_xattr_bucket( 4416 (unsigned long long)blkno, le16_to_cpu(xh->xh_count), 4417 xh_free_start, le16_to_cpu(xh->xh_name_value_len)); 4418 4419 /* 4420 * sort all the entries by their offset. 4421 * the largest will be the first, so that we can 4422 * move them to the end one by one. 4423 */ 4424 sort(entries, le16_to_cpu(xh->xh_count), 4425 sizeof(struct ocfs2_xattr_entry), 4426 cmp_xe_offset, swap_xe); 4427 4428 /* Move all name/values to the end of the bucket. */ 4429 xe = xh->xh_entries; 4430 end = OCFS2_XATTR_BUCKET_SIZE; 4431 for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) { 4432 offset = le16_to_cpu(xe->xe_name_offset); 4433 len = namevalue_size_xe(xe); 4434 4435 /* 4436 * We must make sure that the name/value pair 4437 * exist in the same block. So adjust end to 4438 * the previous block end if needed. 4439 */ 4440 if (((end - len) / blocksize != 4441 (end - 1) / blocksize)) 4442 end = end - end % blocksize; 4443 4444 if (end > offset + len) { 4445 memmove(bucket_buf + end - len, 4446 bucket_buf + offset, len); 4447 xe->xe_name_offset = cpu_to_le16(end - len); 4448 } 4449 4450 mlog_bug_on_msg(end < offset + len, "Defrag check failed for " 4451 "bucket %llu\n", (unsigned long long)blkno); 4452 4453 end -= len; 4454 } 4455 4456 mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for " 4457 "bucket %llu\n", (unsigned long long)blkno); 4458 4459 if (xh_free_start == end) 4460 goto out; 4461 4462 memset(bucket_buf + xh_free_start, 0, end - xh_free_start); 4463 xh->xh_free_start = cpu_to_le16(end); 4464 4465 /* sort the entries by their name_hash. */ 4466 sort(entries, le16_to_cpu(xh->xh_count), 4467 sizeof(struct ocfs2_xattr_entry), 4468 cmp_xe, swap_xe); 4469 4470 buf = bucket_buf; 4471 for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize) 4472 memcpy(bucket_block(bucket, i), buf, blocksize); 4473 ocfs2_xattr_bucket_journal_dirty(handle, bucket); 4474 4475 out: 4476 kfree(bucket_buf); 4477 return ret; 4478 } 4479 4480 /* 4481 * prev_blkno points to the start of an existing extent. new_blkno 4482 * points to a newly allocated extent. Because we know each of our 4483 * clusters contains more than bucket, we can easily split one cluster 4484 * at a bucket boundary. So we take the last cluster of the existing 4485 * extent and split it down the middle. We move the last half of the 4486 * buckets in the last cluster of the existing extent over to the new 4487 * extent. 4488 * 4489 * first_bh is the buffer at prev_blkno so we can update the existing 4490 * extent's bucket count. header_bh is the bucket were we were hoping 4491 * to insert our xattr. If the bucket move places the target in the new 4492 * extent, we'll update first_bh and header_bh after modifying the old 4493 * extent. 4494 * 4495 * first_hash will be set as the 1st xe's name_hash in the new extent. 4496 */ 4497 static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode, 4498 handle_t *handle, 4499 struct ocfs2_xattr_bucket *first, 4500 struct ocfs2_xattr_bucket *target, 4501 u64 new_blkno, 4502 u32 num_clusters, 4503 u32 *first_hash) 4504 { 4505 int ret; 4506 struct super_block *sb = inode->i_sb; 4507 int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(sb); 4508 int num_buckets = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb)); 4509 int to_move = num_buckets / 2; 4510 u64 src_blkno; 4511 u64 last_cluster_blkno = bucket_blkno(first) + 4512 ((num_clusters - 1) * ocfs2_clusters_to_blocks(sb, 1)); 4513 4514 BUG_ON(le16_to_cpu(bucket_xh(first)->xh_num_buckets) < num_buckets); 4515 BUG_ON(OCFS2_XATTR_BUCKET_SIZE == OCFS2_SB(sb)->s_clustersize); 4516 4517 trace_ocfs2_mv_xattr_bucket_cross_cluster( 4518 (unsigned long long)last_cluster_blkno, 4519 (unsigned long long)new_blkno); 4520 4521 ret = ocfs2_mv_xattr_buckets(inode, handle, bucket_blkno(first), 4522 last_cluster_blkno, new_blkno, 4523 to_move, first_hash); 4524 if (ret) { 4525 mlog_errno(ret); 4526 goto out; 4527 } 4528 4529 /* This is the first bucket that got moved */ 4530 src_blkno = last_cluster_blkno + (to_move * blks_per_bucket); 4531 4532 /* 4533 * If the target bucket was part of the moved buckets, we need to 4534 * update first and target. 4535 */ 4536 if (bucket_blkno(target) >= src_blkno) { 4537 /* Find the block for the new target bucket */ 4538 src_blkno = new_blkno + 4539 (bucket_blkno(target) - src_blkno); 4540 4541 ocfs2_xattr_bucket_relse(first); 4542 ocfs2_xattr_bucket_relse(target); 4543 4544 /* 4545 * These shouldn't fail - the buffers are in the 4546 * journal from ocfs2_cp_xattr_bucket(). 4547 */ 4548 ret = ocfs2_read_xattr_bucket(first, new_blkno); 4549 if (ret) { 4550 mlog_errno(ret); 4551 goto out; 4552 } 4553 ret = ocfs2_read_xattr_bucket(target, src_blkno); 4554 if (ret) 4555 mlog_errno(ret); 4556 4557 } 4558 4559 out: 4560 return ret; 4561 } 4562 4563 /* 4564 * Find the suitable pos when we divide a bucket into 2. 4565 * We have to make sure the xattrs with the same hash value exist 4566 * in the same bucket. 4567 * 4568 * If this ocfs2_xattr_header covers more than one hash value, find a 4569 * place where the hash value changes. Try to find the most even split. 4570 * The most common case is that all entries have different hash values, 4571 * and the first check we make will find a place to split. 4572 */ 4573 static int ocfs2_xattr_find_divide_pos(struct ocfs2_xattr_header *xh) 4574 { 4575 struct ocfs2_xattr_entry *entries = xh->xh_entries; 4576 int count = le16_to_cpu(xh->xh_count); 4577 int delta, middle = count / 2; 4578 4579 /* 4580 * We start at the middle. Each step gets farther away in both 4581 * directions. We therefore hit the change in hash value 4582 * nearest to the middle. Note that this loop does not execute for 4583 * count < 2. 4584 */ 4585 for (delta = 0; delta < middle; delta++) { 4586 /* Let's check delta earlier than middle */ 4587 if (cmp_xe(&entries[middle - delta - 1], 4588 &entries[middle - delta])) 4589 return middle - delta; 4590 4591 /* For even counts, don't walk off the end */ 4592 if ((middle + delta + 1) == count) 4593 continue; 4594 4595 /* Now try delta past middle */ 4596 if (cmp_xe(&entries[middle + delta], 4597 &entries[middle + delta + 1])) 4598 return middle + delta + 1; 4599 } 4600 4601 /* Every entry had the same hash */ 4602 return count; 4603 } 4604 4605 /* 4606 * Move some xattrs in old bucket(blk) to new bucket(new_blk). 4607 * first_hash will record the 1st hash of the new bucket. 4608 * 4609 * Normally half of the xattrs will be moved. But we have to make 4610 * sure that the xattrs with the same hash value are stored in the 4611 * same bucket. If all the xattrs in this bucket have the same hash 4612 * value, the new bucket will be initialized as an empty one and the 4613 * first_hash will be initialized as (hash_value+1). 4614 */ 4615 static int ocfs2_divide_xattr_bucket(struct inode *inode, 4616 handle_t *handle, 4617 u64 blk, 4618 u64 new_blk, 4619 u32 *first_hash, 4620 int new_bucket_head) 4621 { 4622 int ret, i; 4623 int count, start, len, name_value_len = 0, name_offset = 0; 4624 struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL; 4625 struct ocfs2_xattr_header *xh; 4626 struct ocfs2_xattr_entry *xe; 4627 int blocksize = inode->i_sb->s_blocksize; 4628 4629 trace_ocfs2_divide_xattr_bucket_begin((unsigned long long)blk, 4630 (unsigned long long)new_blk); 4631 4632 s_bucket = ocfs2_xattr_bucket_new(inode); 4633 t_bucket = ocfs2_xattr_bucket_new(inode); 4634 if (!s_bucket || !t_bucket) { 4635 ret = -ENOMEM; 4636 mlog_errno(ret); 4637 goto out; 4638 } 4639 4640 ret = ocfs2_read_xattr_bucket(s_bucket, blk); 4641 if (ret) { 4642 mlog_errno(ret); 4643 goto out; 4644 } 4645 4646 ret = ocfs2_xattr_bucket_journal_access(handle, s_bucket, 4647 OCFS2_JOURNAL_ACCESS_WRITE); 4648 if (ret) { 4649 mlog_errno(ret); 4650 goto out; 4651 } 4652 4653 /* 4654 * Even if !new_bucket_head, we're overwriting t_bucket. Thus, 4655 * there's no need to read it. 4656 */ 4657 ret = ocfs2_init_xattr_bucket(t_bucket, new_blk, new_bucket_head); 4658 if (ret) { 4659 mlog_errno(ret); 4660 goto out; 4661 } 4662 4663 /* 4664 * Hey, if we're overwriting t_bucket, what difference does 4665 * ACCESS_CREATE vs ACCESS_WRITE make? See the comment in the 4666 * same part of ocfs2_cp_xattr_bucket(). 4667 */ 4668 ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket, 4669 new_bucket_head ? 4670 OCFS2_JOURNAL_ACCESS_CREATE : 4671 OCFS2_JOURNAL_ACCESS_WRITE); 4672 if (ret) { 4673 mlog_errno(ret); 4674 goto out; 4675 } 4676 4677 xh = bucket_xh(s_bucket); 4678 count = le16_to_cpu(xh->xh_count); 4679 start = ocfs2_xattr_find_divide_pos(xh); 4680 4681 if (start == count) { 4682 xe = &xh->xh_entries[start-1]; 4683 4684 /* 4685 * initialized a new empty bucket here. 4686 * The hash value is set as one larger than 4687 * that of the last entry in the previous bucket. 4688 */ 4689 for (i = 0; i < t_bucket->bu_blocks; i++) 4690 memset(bucket_block(t_bucket, i), 0, blocksize); 4691 4692 xh = bucket_xh(t_bucket); 4693 xh->xh_free_start = cpu_to_le16(blocksize); 4694 xh->xh_entries[0].xe_name_hash = xe->xe_name_hash; 4695 le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1); 4696 4697 goto set_num_buckets; 4698 } 4699 4700 /* copy the whole bucket to the new first. */ 4701 ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket); 4702 4703 /* update the new bucket. */ 4704 xh = bucket_xh(t_bucket); 4705 4706 /* 4707 * Calculate the total name/value len and xh_free_start for 4708 * the old bucket first. 4709 */ 4710 name_offset = OCFS2_XATTR_BUCKET_SIZE; 4711 name_value_len = 0; 4712 for (i = 0; i < start; i++) { 4713 xe = &xh->xh_entries[i]; 4714 name_value_len += namevalue_size_xe(xe); 4715 if (le16_to_cpu(xe->xe_name_offset) < name_offset) 4716 name_offset = le16_to_cpu(xe->xe_name_offset); 4717 } 4718 4719 /* 4720 * Now begin the modification to the new bucket. 4721 * 4722 * In the new bucket, We just move the xattr entry to the beginning 4723 * and don't touch the name/value. So there will be some holes in the 4724 * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is 4725 * called. 4726 */ 4727 xe = &xh->xh_entries[start]; 4728 len = sizeof(struct ocfs2_xattr_entry) * (count - start); 4729 trace_ocfs2_divide_xattr_bucket_move(len, 4730 (int)((char *)xe - (char *)xh), 4731 (int)((char *)xh->xh_entries - (char *)xh)); 4732 memmove((char *)xh->xh_entries, (char *)xe, len); 4733 xe = &xh->xh_entries[count - start]; 4734 len = sizeof(struct ocfs2_xattr_entry) * start; 4735 memset((char *)xe, 0, len); 4736 4737 le16_add_cpu(&xh->xh_count, -start); 4738 le16_add_cpu(&xh->xh_name_value_len, -name_value_len); 4739 4740 /* Calculate xh_free_start for the new bucket. */ 4741 xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE); 4742 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 4743 xe = &xh->xh_entries[i]; 4744 if (le16_to_cpu(xe->xe_name_offset) < 4745 le16_to_cpu(xh->xh_free_start)) 4746 xh->xh_free_start = xe->xe_name_offset; 4747 } 4748 4749 set_num_buckets: 4750 /* set xh->xh_num_buckets for the new xh. */ 4751 if (new_bucket_head) 4752 xh->xh_num_buckets = cpu_to_le16(1); 4753 else 4754 xh->xh_num_buckets = 0; 4755 4756 ocfs2_xattr_bucket_journal_dirty(handle, t_bucket); 4757 4758 /* store the first_hash of the new bucket. */ 4759 if (first_hash) 4760 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash); 4761 4762 /* 4763 * Now only update the 1st block of the old bucket. If we 4764 * just added a new empty bucket, there is no need to modify 4765 * it. 4766 */ 4767 if (start == count) 4768 goto out; 4769 4770 xh = bucket_xh(s_bucket); 4771 memset(&xh->xh_entries[start], 0, 4772 sizeof(struct ocfs2_xattr_entry) * (count - start)); 4773 xh->xh_count = cpu_to_le16(start); 4774 xh->xh_free_start = cpu_to_le16(name_offset); 4775 xh->xh_name_value_len = cpu_to_le16(name_value_len); 4776 4777 ocfs2_xattr_bucket_journal_dirty(handle, s_bucket); 4778 4779 out: 4780 ocfs2_xattr_bucket_free(s_bucket); 4781 ocfs2_xattr_bucket_free(t_bucket); 4782 4783 return ret; 4784 } 4785 4786 /* 4787 * Copy xattr from one bucket to another bucket. 4788 * 4789 * The caller must make sure that the journal transaction 4790 * has enough space for journaling. 4791 */ 4792 static int ocfs2_cp_xattr_bucket(struct inode *inode, 4793 handle_t *handle, 4794 u64 s_blkno, 4795 u64 t_blkno, 4796 int t_is_new) 4797 { 4798 int ret; 4799 struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL; 4800 4801 BUG_ON(s_blkno == t_blkno); 4802 4803 trace_ocfs2_cp_xattr_bucket((unsigned long long)s_blkno, 4804 (unsigned long long)t_blkno, 4805 t_is_new); 4806 4807 s_bucket = ocfs2_xattr_bucket_new(inode); 4808 t_bucket = ocfs2_xattr_bucket_new(inode); 4809 if (!s_bucket || !t_bucket) { 4810 ret = -ENOMEM; 4811 mlog_errno(ret); 4812 goto out; 4813 } 4814 4815 ret = ocfs2_read_xattr_bucket(s_bucket, s_blkno); 4816 if (ret) 4817 goto out; 4818 4819 /* 4820 * Even if !t_is_new, we're overwriting t_bucket. Thus, 4821 * there's no need to read it. 4822 */ 4823 ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno, t_is_new); 4824 if (ret) 4825 goto out; 4826 4827 /* 4828 * Hey, if we're overwriting t_bucket, what difference does 4829 * ACCESS_CREATE vs ACCESS_WRITE make? Well, if we allocated a new 4830 * cluster to fill, we came here from 4831 * ocfs2_mv_xattr_buckets(), and it is really new - 4832 * ACCESS_CREATE is required. But we also might have moved data 4833 * out of t_bucket before extending back into it. 4834 * ocfs2_add_new_xattr_bucket() can do this - its call to 4835 * ocfs2_add_new_xattr_cluster() may have created a new extent 4836 * and copied out the end of the old extent. Then it re-extends 4837 * the old extent back to create space for new xattrs. That's 4838 * how we get here, and the bucket isn't really new. 4839 */ 4840 ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket, 4841 t_is_new ? 4842 OCFS2_JOURNAL_ACCESS_CREATE : 4843 OCFS2_JOURNAL_ACCESS_WRITE); 4844 if (ret) 4845 goto out; 4846 4847 ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket); 4848 ocfs2_xattr_bucket_journal_dirty(handle, t_bucket); 4849 4850 out: 4851 ocfs2_xattr_bucket_free(t_bucket); 4852 ocfs2_xattr_bucket_free(s_bucket); 4853 4854 return ret; 4855 } 4856 4857 /* 4858 * src_blk points to the start of an existing extent. last_blk points to 4859 * last cluster in that extent. to_blk points to a newly allocated 4860 * extent. We copy the buckets from the cluster at last_blk to the new 4861 * extent. If start_bucket is non-zero, we skip that many buckets before 4862 * we start copying. The new extent's xh_num_buckets gets set to the 4863 * number of buckets we copied. The old extent's xh_num_buckets shrinks 4864 * by the same amount. 4865 */ 4866 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle, 4867 u64 src_blk, u64 last_blk, u64 to_blk, 4868 unsigned int start_bucket, 4869 u32 *first_hash) 4870 { 4871 int i, ret, credits; 4872 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 4873 int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 4874 int num_buckets = ocfs2_xattr_buckets_per_cluster(osb); 4875 struct ocfs2_xattr_bucket *old_first, *new_first; 4876 4877 trace_ocfs2_mv_xattr_buckets((unsigned long long)last_blk, 4878 (unsigned long long)to_blk); 4879 4880 BUG_ON(start_bucket >= num_buckets); 4881 if (start_bucket) { 4882 num_buckets -= start_bucket; 4883 last_blk += (start_bucket * blks_per_bucket); 4884 } 4885 4886 /* The first bucket of the original extent */ 4887 old_first = ocfs2_xattr_bucket_new(inode); 4888 /* The first bucket of the new extent */ 4889 new_first = ocfs2_xattr_bucket_new(inode); 4890 if (!old_first || !new_first) { 4891 ret = -ENOMEM; 4892 mlog_errno(ret); 4893 goto out; 4894 } 4895 4896 ret = ocfs2_read_xattr_bucket(old_first, src_blk); 4897 if (ret) { 4898 mlog_errno(ret); 4899 goto out; 4900 } 4901 4902 /* 4903 * We need to update the first bucket of the old extent and all 4904 * the buckets going to the new extent. 4905 */ 4906 credits = ((num_buckets + 1) * blks_per_bucket); 4907 ret = ocfs2_extend_trans(handle, credits); 4908 if (ret) { 4909 mlog_errno(ret); 4910 goto out; 4911 } 4912 4913 ret = ocfs2_xattr_bucket_journal_access(handle, old_first, 4914 OCFS2_JOURNAL_ACCESS_WRITE); 4915 if (ret) { 4916 mlog_errno(ret); 4917 goto out; 4918 } 4919 4920 for (i = 0; i < num_buckets; i++) { 4921 ret = ocfs2_cp_xattr_bucket(inode, handle, 4922 last_blk + (i * blks_per_bucket), 4923 to_blk + (i * blks_per_bucket), 4924 1); 4925 if (ret) { 4926 mlog_errno(ret); 4927 goto out; 4928 } 4929 } 4930 4931 /* 4932 * Get the new bucket ready before we dirty anything 4933 * (This actually shouldn't fail, because we already dirtied 4934 * it once in ocfs2_cp_xattr_bucket()). 4935 */ 4936 ret = ocfs2_read_xattr_bucket(new_first, to_blk); 4937 if (ret) { 4938 mlog_errno(ret); 4939 goto out; 4940 } 4941 ret = ocfs2_xattr_bucket_journal_access(handle, new_first, 4942 OCFS2_JOURNAL_ACCESS_WRITE); 4943 if (ret) { 4944 mlog_errno(ret); 4945 goto out; 4946 } 4947 4948 /* Now update the headers */ 4949 le16_add_cpu(&bucket_xh(old_first)->xh_num_buckets, -num_buckets); 4950 ocfs2_xattr_bucket_journal_dirty(handle, old_first); 4951 4952 bucket_xh(new_first)->xh_num_buckets = cpu_to_le16(num_buckets); 4953 ocfs2_xattr_bucket_journal_dirty(handle, new_first); 4954 4955 if (first_hash) 4956 *first_hash = le32_to_cpu(bucket_xh(new_first)->xh_entries[0].xe_name_hash); 4957 4958 out: 4959 ocfs2_xattr_bucket_free(new_first); 4960 ocfs2_xattr_bucket_free(old_first); 4961 return ret; 4962 } 4963 4964 /* 4965 * Move some xattrs in this cluster to the new cluster. 4966 * This function should only be called when bucket size == cluster size. 4967 * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead. 4968 */ 4969 static int ocfs2_divide_xattr_cluster(struct inode *inode, 4970 handle_t *handle, 4971 u64 prev_blk, 4972 u64 new_blk, 4973 u32 *first_hash) 4974 { 4975 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 4976 int ret, credits = 2 * blk_per_bucket; 4977 4978 BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize); 4979 4980 ret = ocfs2_extend_trans(handle, credits); 4981 if (ret) { 4982 mlog_errno(ret); 4983 return ret; 4984 } 4985 4986 /* Move half of the xattr in start_blk to the next bucket. */ 4987 return ocfs2_divide_xattr_bucket(inode, handle, prev_blk, 4988 new_blk, first_hash, 1); 4989 } 4990 4991 /* 4992 * Move some xattrs from the old cluster to the new one since they are not 4993 * contiguous in ocfs2 xattr tree. 4994 * 4995 * new_blk starts a new separate cluster, and we will move some xattrs from 4996 * prev_blk to it. v_start will be set as the first name hash value in this 4997 * new cluster so that it can be used as e_cpos during tree insertion and 4998 * don't collide with our original b-tree operations. first_bh and header_bh 4999 * will also be updated since they will be used in ocfs2_extend_xattr_bucket 5000 * to extend the insert bucket. 5001 * 5002 * The problem is how much xattr should we move to the new one and when should 5003 * we update first_bh and header_bh? 5004 * 1. If cluster size > bucket size, that means the previous cluster has more 5005 * than 1 bucket, so just move half nums of bucket into the new cluster and 5006 * update the first_bh and header_bh if the insert bucket has been moved 5007 * to the new cluster. 5008 * 2. If cluster_size == bucket_size: 5009 * a) If the previous extent rec has more than one cluster and the insert 5010 * place isn't in the last cluster, copy the entire last cluster to the 5011 * new one. This time, we don't need to upate the first_bh and header_bh 5012 * since they will not be moved into the new cluster. 5013 * b) Otherwise, move the bottom half of the xattrs in the last cluster into 5014 * the new one. And we set the extend flag to zero if the insert place is 5015 * moved into the new allocated cluster since no extend is needed. 5016 */ 5017 static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode, 5018 handle_t *handle, 5019 struct ocfs2_xattr_bucket *first, 5020 struct ocfs2_xattr_bucket *target, 5021 u64 new_blk, 5022 u32 prev_clusters, 5023 u32 *v_start, 5024 int *extend) 5025 { 5026 int ret; 5027 5028 trace_ocfs2_adjust_xattr_cross_cluster( 5029 (unsigned long long)bucket_blkno(first), 5030 (unsigned long long)new_blk, prev_clusters); 5031 5032 if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) { 5033 ret = ocfs2_mv_xattr_bucket_cross_cluster(inode, 5034 handle, 5035 first, target, 5036 new_blk, 5037 prev_clusters, 5038 v_start); 5039 if (ret) 5040 mlog_errno(ret); 5041 } else { 5042 /* The start of the last cluster in the first extent */ 5043 u64 last_blk = bucket_blkno(first) + 5044 ((prev_clusters - 1) * 5045 ocfs2_clusters_to_blocks(inode->i_sb, 1)); 5046 5047 if (prev_clusters > 1 && bucket_blkno(target) != last_blk) { 5048 ret = ocfs2_mv_xattr_buckets(inode, handle, 5049 bucket_blkno(first), 5050 last_blk, new_blk, 0, 5051 v_start); 5052 if (ret) 5053 mlog_errno(ret); 5054 } else { 5055 ret = ocfs2_divide_xattr_cluster(inode, handle, 5056 last_blk, new_blk, 5057 v_start); 5058 if (ret) 5059 mlog_errno(ret); 5060 5061 if ((bucket_blkno(target) == last_blk) && extend) 5062 *extend = 0; 5063 } 5064 } 5065 5066 return ret; 5067 } 5068 5069 /* 5070 * Add a new cluster for xattr storage. 5071 * 5072 * If the new cluster is contiguous with the previous one, it will be 5073 * appended to the same extent record, and num_clusters will be updated. 5074 * If not, we will insert a new extent for it and move some xattrs in 5075 * the last cluster into the new allocated one. 5076 * We also need to limit the maximum size of a btree leaf, otherwise we'll 5077 * lose the benefits of hashing because we'll have to search large leaves. 5078 * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize, 5079 * if it's bigger). 5080 * 5081 * first_bh is the first block of the previous extent rec and header_bh 5082 * indicates the bucket we will insert the new xattrs. They will be updated 5083 * when the header_bh is moved into the new cluster. 5084 */ 5085 static int ocfs2_add_new_xattr_cluster(struct inode *inode, 5086 struct buffer_head *root_bh, 5087 struct ocfs2_xattr_bucket *first, 5088 struct ocfs2_xattr_bucket *target, 5089 u32 *num_clusters, 5090 u32 prev_cpos, 5091 int *extend, 5092 struct ocfs2_xattr_set_ctxt *ctxt) 5093 { 5094 int ret; 5095 u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 5096 u32 prev_clusters = *num_clusters; 5097 u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0; 5098 u64 block; 5099 handle_t *handle = ctxt->handle; 5100 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5101 struct ocfs2_extent_tree et; 5102 5103 trace_ocfs2_add_new_xattr_cluster_begin( 5104 (unsigned long long)OCFS2_I(inode)->ip_blkno, 5105 (unsigned long long)bucket_blkno(first), 5106 prev_cpos, prev_clusters); 5107 5108 ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh); 5109 5110 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh, 5111 OCFS2_JOURNAL_ACCESS_WRITE); 5112 if (ret < 0) { 5113 mlog_errno(ret); 5114 goto leave; 5115 } 5116 5117 ret = __ocfs2_claim_clusters(handle, ctxt->data_ac, 1, 5118 clusters_to_add, &bit_off, &num_bits); 5119 if (ret < 0) { 5120 if (ret != -ENOSPC) 5121 mlog_errno(ret); 5122 goto leave; 5123 } 5124 5125 BUG_ON(num_bits > clusters_to_add); 5126 5127 block = ocfs2_clusters_to_blocks(osb->sb, bit_off); 5128 trace_ocfs2_add_new_xattr_cluster((unsigned long long)block, num_bits); 5129 5130 if (bucket_blkno(first) + (prev_clusters * bpc) == block && 5131 (prev_clusters + num_bits) << osb->s_clustersize_bits <= 5132 OCFS2_MAX_XATTR_TREE_LEAF_SIZE) { 5133 /* 5134 * If this cluster is contiguous with the old one and 5135 * adding this new cluster, we don't surpass the limit of 5136 * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be 5137 * initialized and used like other buckets in the previous 5138 * cluster. 5139 * So add it as a contiguous one. The caller will handle 5140 * its init process. 5141 */ 5142 v_start = prev_cpos + prev_clusters; 5143 *num_clusters = prev_clusters + num_bits; 5144 } else { 5145 ret = ocfs2_adjust_xattr_cross_cluster(inode, 5146 handle, 5147 first, 5148 target, 5149 block, 5150 prev_clusters, 5151 &v_start, 5152 extend); 5153 if (ret) { 5154 mlog_errno(ret); 5155 goto leave; 5156 } 5157 } 5158 5159 trace_ocfs2_add_new_xattr_cluster_insert((unsigned long long)block, 5160 v_start, num_bits); 5161 ret = ocfs2_insert_extent(handle, &et, v_start, block, 5162 num_bits, 0, ctxt->meta_ac); 5163 if (ret < 0) { 5164 mlog_errno(ret); 5165 goto leave; 5166 } 5167 5168 ocfs2_journal_dirty(handle, root_bh); 5169 5170 leave: 5171 return ret; 5172 } 5173 5174 /* 5175 * We are given an extent. 'first' is the bucket at the very front of 5176 * the extent. The extent has space for an additional bucket past 5177 * bucket_xh(first)->xh_num_buckets. 'target_blkno' is the block number 5178 * of the target bucket. We wish to shift every bucket past the target 5179 * down one, filling in that additional space. When we get back to the 5180 * target, we split the target between itself and the now-empty bucket 5181 * at target+1 (aka, target_blkno + blks_per_bucket). 5182 */ 5183 static int ocfs2_extend_xattr_bucket(struct inode *inode, 5184 handle_t *handle, 5185 struct ocfs2_xattr_bucket *first, 5186 u64 target_blk, 5187 u32 num_clusters) 5188 { 5189 int ret, credits; 5190 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5191 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 5192 u64 end_blk; 5193 u16 new_bucket = le16_to_cpu(bucket_xh(first)->xh_num_buckets); 5194 5195 trace_ocfs2_extend_xattr_bucket((unsigned long long)target_blk, 5196 (unsigned long long)bucket_blkno(first), 5197 num_clusters, new_bucket); 5198 5199 /* The extent must have room for an additional bucket */ 5200 BUG_ON(new_bucket >= 5201 (num_clusters * ocfs2_xattr_buckets_per_cluster(osb))); 5202 5203 /* end_blk points to the last existing bucket */ 5204 end_blk = bucket_blkno(first) + ((new_bucket - 1) * blk_per_bucket); 5205 5206 /* 5207 * end_blk is the start of the last existing bucket. 5208 * Thus, (end_blk - target_blk) covers the target bucket and 5209 * every bucket after it up to, but not including, the last 5210 * existing bucket. Then we add the last existing bucket, the 5211 * new bucket, and the first bucket (3 * blk_per_bucket). 5212 */ 5213 credits = (end_blk - target_blk) + (3 * blk_per_bucket); 5214 ret = ocfs2_extend_trans(handle, credits); 5215 if (ret) { 5216 mlog_errno(ret); 5217 goto out; 5218 } 5219 5220 ret = ocfs2_xattr_bucket_journal_access(handle, first, 5221 OCFS2_JOURNAL_ACCESS_WRITE); 5222 if (ret) { 5223 mlog_errno(ret); 5224 goto out; 5225 } 5226 5227 while (end_blk != target_blk) { 5228 ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk, 5229 end_blk + blk_per_bucket, 0); 5230 if (ret) 5231 goto out; 5232 end_blk -= blk_per_bucket; 5233 } 5234 5235 /* Move half of the xattr in target_blkno to the next bucket. */ 5236 ret = ocfs2_divide_xattr_bucket(inode, handle, target_blk, 5237 target_blk + blk_per_bucket, NULL, 0); 5238 5239 le16_add_cpu(&bucket_xh(first)->xh_num_buckets, 1); 5240 ocfs2_xattr_bucket_journal_dirty(handle, first); 5241 5242 out: 5243 return ret; 5244 } 5245 5246 /* 5247 * Add new xattr bucket in an extent record and adjust the buckets 5248 * accordingly. xb_bh is the ocfs2_xattr_block, and target is the 5249 * bucket we want to insert into. 5250 * 5251 * In the easy case, we will move all the buckets after target down by 5252 * one. Half of target's xattrs will be moved to the next bucket. 5253 * 5254 * If current cluster is full, we'll allocate a new one. This may not 5255 * be contiguous. The underlying calls will make sure that there is 5256 * space for the insert, shifting buckets around if necessary. 5257 * 'target' may be moved by those calls. 5258 */ 5259 static int ocfs2_add_new_xattr_bucket(struct inode *inode, 5260 struct buffer_head *xb_bh, 5261 struct ocfs2_xattr_bucket *target, 5262 struct ocfs2_xattr_set_ctxt *ctxt) 5263 { 5264 struct ocfs2_xattr_block *xb = 5265 (struct ocfs2_xattr_block *)xb_bh->b_data; 5266 struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root; 5267 struct ocfs2_extent_list *el = &xb_root->xt_list; 5268 u32 name_hash = 5269 le32_to_cpu(bucket_xh(target)->xh_entries[0].xe_name_hash); 5270 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5271 int ret, num_buckets, extend = 1; 5272 u64 p_blkno; 5273 u32 e_cpos, num_clusters; 5274 /* The bucket at the front of the extent */ 5275 struct ocfs2_xattr_bucket *first; 5276 5277 trace_ocfs2_add_new_xattr_bucket( 5278 (unsigned long long)bucket_blkno(target)); 5279 5280 /* The first bucket of the original extent */ 5281 first = ocfs2_xattr_bucket_new(inode); 5282 if (!first) { 5283 ret = -ENOMEM; 5284 mlog_errno(ret); 5285 goto out; 5286 } 5287 5288 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos, 5289 &num_clusters, el); 5290 if (ret) { 5291 mlog_errno(ret); 5292 goto out; 5293 } 5294 5295 ret = ocfs2_read_xattr_bucket(first, p_blkno); 5296 if (ret) { 5297 mlog_errno(ret); 5298 goto out; 5299 } 5300 5301 num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters; 5302 if (num_buckets == le16_to_cpu(bucket_xh(first)->xh_num_buckets)) { 5303 /* 5304 * This can move first+target if the target bucket moves 5305 * to the new extent. 5306 */ 5307 ret = ocfs2_add_new_xattr_cluster(inode, 5308 xb_bh, 5309 first, 5310 target, 5311 &num_clusters, 5312 e_cpos, 5313 &extend, 5314 ctxt); 5315 if (ret) { 5316 mlog_errno(ret); 5317 goto out; 5318 } 5319 } 5320 5321 if (extend) { 5322 ret = ocfs2_extend_xattr_bucket(inode, 5323 ctxt->handle, 5324 first, 5325 bucket_blkno(target), 5326 num_clusters); 5327 if (ret) 5328 mlog_errno(ret); 5329 } 5330 5331 out: 5332 ocfs2_xattr_bucket_free(first); 5333 5334 return ret; 5335 } 5336 5337 /* 5338 * Truncate the specified xe_off entry in xattr bucket. 5339 * bucket is indicated by header_bh and len is the new length. 5340 * Both the ocfs2_xattr_value_root and the entry will be updated here. 5341 * 5342 * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed. 5343 */ 5344 static int ocfs2_xattr_bucket_value_truncate(struct inode *inode, 5345 struct ocfs2_xattr_bucket *bucket, 5346 int xe_off, 5347 int len, 5348 struct ocfs2_xattr_set_ctxt *ctxt) 5349 { 5350 int ret, offset; 5351 u64 value_blk; 5352 struct ocfs2_xattr_entry *xe; 5353 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 5354 size_t blocksize = inode->i_sb->s_blocksize; 5355 struct ocfs2_xattr_value_buf vb = { 5356 .vb_access = ocfs2_journal_access, 5357 }; 5358 5359 xe = &xh->xh_entries[xe_off]; 5360 5361 BUG_ON(!xe || ocfs2_xattr_is_local(xe)); 5362 5363 offset = le16_to_cpu(xe->xe_name_offset) + 5364 OCFS2_XATTR_SIZE(xe->xe_name_len); 5365 5366 value_blk = offset / blocksize; 5367 5368 /* We don't allow ocfs2_xattr_value to be stored in different block. */ 5369 BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize); 5370 5371 vb.vb_bh = bucket->bu_bhs[value_blk]; 5372 BUG_ON(!vb.vb_bh); 5373 5374 vb.vb_xv = (struct ocfs2_xattr_value_root *) 5375 (vb.vb_bh->b_data + offset % blocksize); 5376 5377 /* 5378 * From here on out we have to dirty the bucket. The generic 5379 * value calls only modify one of the bucket's bhs, but we need 5380 * to send the bucket at once. So if they error, they *could* have 5381 * modified something. We have to assume they did, and dirty 5382 * the whole bucket. This leaves us in a consistent state. 5383 */ 5384 trace_ocfs2_xattr_bucket_value_truncate( 5385 (unsigned long long)bucket_blkno(bucket), xe_off, len); 5386 ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt); 5387 if (ret) { 5388 mlog_errno(ret); 5389 goto out; 5390 } 5391 5392 ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket, 5393 OCFS2_JOURNAL_ACCESS_WRITE); 5394 if (ret) { 5395 mlog_errno(ret); 5396 goto out; 5397 } 5398 5399 xe->xe_value_size = cpu_to_le64(len); 5400 5401 ocfs2_xattr_bucket_journal_dirty(ctxt->handle, bucket); 5402 5403 out: 5404 return ret; 5405 } 5406 5407 static int ocfs2_rm_xattr_cluster(struct inode *inode, 5408 struct buffer_head *root_bh, 5409 u64 blkno, 5410 u32 cpos, 5411 u32 len, 5412 void *para) 5413 { 5414 int ret; 5415 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5416 struct inode *tl_inode = osb->osb_tl_inode; 5417 handle_t *handle; 5418 struct ocfs2_xattr_block *xb = 5419 (struct ocfs2_xattr_block *)root_bh->b_data; 5420 struct ocfs2_alloc_context *meta_ac = NULL; 5421 struct ocfs2_cached_dealloc_ctxt dealloc; 5422 struct ocfs2_extent_tree et; 5423 5424 ret = ocfs2_iterate_xattr_buckets(inode, blkno, len, 5425 ocfs2_delete_xattr_in_bucket, para); 5426 if (ret) { 5427 mlog_errno(ret); 5428 return ret; 5429 } 5430 5431 ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh); 5432 5433 ocfs2_init_dealloc_ctxt(&dealloc); 5434 5435 trace_ocfs2_rm_xattr_cluster( 5436 (unsigned long long)OCFS2_I(inode)->ip_blkno, 5437 (unsigned long long)blkno, cpos, len); 5438 5439 ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), blkno, 5440 len); 5441 5442 ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac); 5443 if (ret) { 5444 mlog_errno(ret); 5445 return ret; 5446 } 5447 5448 inode_lock(tl_inode); 5449 5450 if (ocfs2_truncate_log_needs_flush(osb)) { 5451 ret = __ocfs2_flush_truncate_log(osb); 5452 if (ret < 0) { 5453 mlog_errno(ret); 5454 goto out; 5455 } 5456 } 5457 5458 handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb)); 5459 if (IS_ERR(handle)) { 5460 ret = -ENOMEM; 5461 mlog_errno(ret); 5462 goto out; 5463 } 5464 5465 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh, 5466 OCFS2_JOURNAL_ACCESS_WRITE); 5467 if (ret) { 5468 mlog_errno(ret); 5469 goto out_commit; 5470 } 5471 5472 ret = ocfs2_remove_extent(handle, &et, cpos, len, meta_ac, 5473 &dealloc); 5474 if (ret) { 5475 mlog_errno(ret); 5476 goto out_commit; 5477 } 5478 5479 le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len); 5480 ocfs2_journal_dirty(handle, root_bh); 5481 5482 ret = ocfs2_truncate_log_append(osb, handle, blkno, len); 5483 if (ret) 5484 mlog_errno(ret); 5485 ocfs2_update_inode_fsync_trans(handle, inode, 0); 5486 5487 out_commit: 5488 ocfs2_commit_trans(osb, handle); 5489 out: 5490 ocfs2_schedule_truncate_log_flush(osb, 1); 5491 5492 inode_unlock(tl_inode); 5493 5494 if (meta_ac) 5495 ocfs2_free_alloc_context(meta_ac); 5496 5497 ocfs2_run_deallocs(osb, &dealloc); 5498 5499 return ret; 5500 } 5501 5502 /* 5503 * check whether the xattr bucket is filled up with the same hash value. 5504 * If we want to insert the xattr with the same hash, return -ENOSPC. 5505 * If we want to insert a xattr with different hash value, go ahead 5506 * and ocfs2_divide_xattr_bucket will handle this. 5507 */ 5508 static int ocfs2_check_xattr_bucket_collision(struct inode *inode, 5509 struct ocfs2_xattr_bucket *bucket, 5510 const char *name) 5511 { 5512 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 5513 u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name)); 5514 5515 if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash)) 5516 return 0; 5517 5518 if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash == 5519 xh->xh_entries[0].xe_name_hash) { 5520 mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, " 5521 "hash = %u\n", 5522 (unsigned long long)bucket_blkno(bucket), 5523 le32_to_cpu(xh->xh_entries[0].xe_name_hash)); 5524 return -ENOSPC; 5525 } 5526 5527 return 0; 5528 } 5529 5530 /* 5531 * Try to set the entry in the current bucket. If we fail, the caller 5532 * will handle getting us another bucket. 5533 */ 5534 static int ocfs2_xattr_set_entry_bucket(struct inode *inode, 5535 struct ocfs2_xattr_info *xi, 5536 struct ocfs2_xattr_search *xs, 5537 struct ocfs2_xattr_set_ctxt *ctxt) 5538 { 5539 int ret; 5540 struct ocfs2_xa_loc loc; 5541 5542 trace_ocfs2_xattr_set_entry_bucket(xi->xi_name); 5543 5544 ocfs2_init_xattr_bucket_xa_loc(&loc, xs->bucket, 5545 xs->not_found ? NULL : xs->here); 5546 ret = ocfs2_xa_set(&loc, xi, ctxt); 5547 if (!ret) { 5548 xs->here = loc.xl_entry; 5549 goto out; 5550 } 5551 if (ret != -ENOSPC) { 5552 mlog_errno(ret); 5553 goto out; 5554 } 5555 5556 /* Ok, we need space. Let's try defragmenting the bucket. */ 5557 ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle, 5558 xs->bucket); 5559 if (ret) { 5560 mlog_errno(ret); 5561 goto out; 5562 } 5563 5564 ret = ocfs2_xa_set(&loc, xi, ctxt); 5565 if (!ret) { 5566 xs->here = loc.xl_entry; 5567 goto out; 5568 } 5569 if (ret != -ENOSPC) 5570 mlog_errno(ret); 5571 5572 5573 out: 5574 return ret; 5575 } 5576 5577 static int ocfs2_xattr_set_entry_index_block(struct inode *inode, 5578 struct ocfs2_xattr_info *xi, 5579 struct ocfs2_xattr_search *xs, 5580 struct ocfs2_xattr_set_ctxt *ctxt) 5581 { 5582 int ret; 5583 5584 trace_ocfs2_xattr_set_entry_index_block(xi->xi_name); 5585 5586 ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt); 5587 if (!ret) 5588 goto out; 5589 if (ret != -ENOSPC) { 5590 mlog_errno(ret); 5591 goto out; 5592 } 5593 5594 /* Ack, need more space. Let's try to get another bucket! */ 5595 5596 /* 5597 * We do not allow for overlapping ranges between buckets. And 5598 * the maximum number of collisions we will allow for then is 5599 * one bucket's worth, so check it here whether we need to 5600 * add a new bucket for the insert. 5601 */ 5602 ret = ocfs2_check_xattr_bucket_collision(inode, 5603 xs->bucket, 5604 xi->xi_name); 5605 if (ret) { 5606 mlog_errno(ret); 5607 goto out; 5608 } 5609 5610 ret = ocfs2_add_new_xattr_bucket(inode, 5611 xs->xattr_bh, 5612 xs->bucket, 5613 ctxt); 5614 if (ret) { 5615 mlog_errno(ret); 5616 goto out; 5617 } 5618 5619 /* 5620 * ocfs2_add_new_xattr_bucket() will have updated 5621 * xs->bucket if it moved, but it will not have updated 5622 * any of the other search fields. Thus, we drop it and 5623 * re-search. Everything should be cached, so it'll be 5624 * quick. 5625 */ 5626 ocfs2_xattr_bucket_relse(xs->bucket); 5627 ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh, 5628 xi->xi_name_index, 5629 xi->xi_name, xs); 5630 if (ret && ret != -ENODATA) 5631 goto out; 5632 xs->not_found = ret; 5633 5634 /* Ok, we have a new bucket, let's try again */ 5635 ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt); 5636 if (ret && (ret != -ENOSPC)) 5637 mlog_errno(ret); 5638 5639 out: 5640 return ret; 5641 } 5642 5643 static int ocfs2_delete_xattr_in_bucket(struct inode *inode, 5644 struct ocfs2_xattr_bucket *bucket, 5645 void *para) 5646 { 5647 int ret = 0, ref_credits; 5648 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 5649 u16 i; 5650 struct ocfs2_xattr_entry *xe; 5651 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5652 struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,}; 5653 int credits = ocfs2_remove_extent_credits(osb->sb) + 5654 ocfs2_blocks_per_xattr_bucket(inode->i_sb); 5655 struct ocfs2_xattr_value_root *xv; 5656 struct ocfs2_rm_xattr_bucket_para *args = 5657 (struct ocfs2_rm_xattr_bucket_para *)para; 5658 5659 ocfs2_init_dealloc_ctxt(&ctxt.dealloc); 5660 5661 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 5662 xe = &xh->xh_entries[i]; 5663 if (ocfs2_xattr_is_local(xe)) 5664 continue; 5665 5666 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, 5667 i, &xv, NULL); 5668 if (ret) { 5669 mlog_errno(ret); 5670 break; 5671 } 5672 5673 ret = ocfs2_lock_xattr_remove_allocators(inode, xv, 5674 args->ref_ci, 5675 args->ref_root_bh, 5676 &ctxt.meta_ac, 5677 &ref_credits); 5678 5679 ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits); 5680 if (IS_ERR(ctxt.handle)) { 5681 ret = PTR_ERR(ctxt.handle); 5682 mlog_errno(ret); 5683 break; 5684 } 5685 5686 ret = ocfs2_xattr_bucket_value_truncate(inode, bucket, 5687 i, 0, &ctxt); 5688 5689 ocfs2_commit_trans(osb, ctxt.handle); 5690 if (ctxt.meta_ac) { 5691 ocfs2_free_alloc_context(ctxt.meta_ac); 5692 ctxt.meta_ac = NULL; 5693 } 5694 if (ret) { 5695 mlog_errno(ret); 5696 break; 5697 } 5698 } 5699 5700 if (ctxt.meta_ac) 5701 ocfs2_free_alloc_context(ctxt.meta_ac); 5702 ocfs2_schedule_truncate_log_flush(osb, 1); 5703 ocfs2_run_deallocs(osb, &ctxt.dealloc); 5704 return ret; 5705 } 5706 5707 /* 5708 * Whenever we modify a xattr value root in the bucket(e.g, CoW 5709 * or change the extent record flag), we need to recalculate 5710 * the metaecc for the whole bucket. So it is done here. 5711 * 5712 * Note: 5713 * We have to give the extra credits for the caller. 5714 */ 5715 static int ocfs2_xattr_bucket_post_refcount(struct inode *inode, 5716 handle_t *handle, 5717 void *para) 5718 { 5719 int ret; 5720 struct ocfs2_xattr_bucket *bucket = 5721 (struct ocfs2_xattr_bucket *)para; 5722 5723 ret = ocfs2_xattr_bucket_journal_access(handle, bucket, 5724 OCFS2_JOURNAL_ACCESS_WRITE); 5725 if (ret) { 5726 mlog_errno(ret); 5727 return ret; 5728 } 5729 5730 ocfs2_xattr_bucket_journal_dirty(handle, bucket); 5731 5732 return 0; 5733 } 5734 5735 /* 5736 * Special action we need if the xattr value is refcounted. 5737 * 5738 * 1. If the xattr is refcounted, lock the tree. 5739 * 2. CoW the xattr if we are setting the new value and the value 5740 * will be stored outside. 5741 * 3. In other case, decrease_refcount will work for us, so just 5742 * lock the refcount tree, calculate the meta and credits is OK. 5743 * 5744 * We have to do CoW before ocfs2_init_xattr_set_ctxt since 5745 * currently CoW is a completed transaction, while this function 5746 * will also lock the allocators and let us deadlock. So we will 5747 * CoW the whole xattr value. 5748 */ 5749 static int ocfs2_prepare_refcount_xattr(struct inode *inode, 5750 struct ocfs2_dinode *di, 5751 struct ocfs2_xattr_info *xi, 5752 struct ocfs2_xattr_search *xis, 5753 struct ocfs2_xattr_search *xbs, 5754 struct ocfs2_refcount_tree **ref_tree, 5755 int *meta_add, 5756 int *credits) 5757 { 5758 int ret = 0; 5759 struct ocfs2_xattr_block *xb; 5760 struct ocfs2_xattr_entry *xe; 5761 char *base; 5762 u32 p_cluster, num_clusters; 5763 unsigned int ext_flags; 5764 int name_offset, name_len; 5765 struct ocfs2_xattr_value_buf vb; 5766 struct ocfs2_xattr_bucket *bucket = NULL; 5767 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5768 struct ocfs2_post_refcount refcount; 5769 struct ocfs2_post_refcount *p = NULL; 5770 struct buffer_head *ref_root_bh = NULL; 5771 5772 if (!xis->not_found) { 5773 xe = xis->here; 5774 name_offset = le16_to_cpu(xe->xe_name_offset); 5775 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 5776 base = xis->base; 5777 vb.vb_bh = xis->inode_bh; 5778 vb.vb_access = ocfs2_journal_access_di; 5779 } else { 5780 int i, block_off = 0; 5781 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; 5782 xe = xbs->here; 5783 name_offset = le16_to_cpu(xe->xe_name_offset); 5784 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 5785 i = xbs->here - xbs->header->xh_entries; 5786 5787 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 5788 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 5789 bucket_xh(xbs->bucket), 5790 i, &block_off, 5791 &name_offset); 5792 if (ret) { 5793 mlog_errno(ret); 5794 goto out; 5795 } 5796 base = bucket_block(xbs->bucket, block_off); 5797 vb.vb_bh = xbs->bucket->bu_bhs[block_off]; 5798 vb.vb_access = ocfs2_journal_access; 5799 5800 if (ocfs2_meta_ecc(osb)) { 5801 /*create parameters for ocfs2_post_refcount. */ 5802 bucket = xbs->bucket; 5803 refcount.credits = bucket->bu_blocks; 5804 refcount.para = bucket; 5805 refcount.func = 5806 ocfs2_xattr_bucket_post_refcount; 5807 p = &refcount; 5808 } 5809 } else { 5810 base = xbs->base; 5811 vb.vb_bh = xbs->xattr_bh; 5812 vb.vb_access = ocfs2_journal_access_xb; 5813 } 5814 } 5815 5816 if (ocfs2_xattr_is_local(xe)) 5817 goto out; 5818 5819 vb.vb_xv = (struct ocfs2_xattr_value_root *) 5820 (base + name_offset + name_len); 5821 5822 ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster, 5823 &num_clusters, &vb.vb_xv->xr_list, 5824 &ext_flags); 5825 if (ret) { 5826 mlog_errno(ret); 5827 goto out; 5828 } 5829 5830 /* 5831 * We just need to check the 1st extent record, since we always 5832 * CoW the whole xattr. So there shouldn't be a xattr with 5833 * some REFCOUNT extent recs after the 1st one. 5834 */ 5835 if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) 5836 goto out; 5837 5838 ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc), 5839 1, ref_tree, &ref_root_bh); 5840 if (ret) { 5841 mlog_errno(ret); 5842 goto out; 5843 } 5844 5845 /* 5846 * If we are deleting the xattr or the new size will be stored inside, 5847 * cool, leave it there, the xattr truncate process will remove them 5848 * for us(it still needs the refcount tree lock and the meta, credits). 5849 * And the worse case is that every cluster truncate will split the 5850 * refcount tree, and make the original extent become 3. So we will need 5851 * 2 * cluster more extent recs at most. 5852 */ 5853 if (!xi->xi_value || xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE) { 5854 5855 ret = ocfs2_refcounted_xattr_delete_need(inode, 5856 &(*ref_tree)->rf_ci, 5857 ref_root_bh, vb.vb_xv, 5858 meta_add, credits); 5859 if (ret) 5860 mlog_errno(ret); 5861 goto out; 5862 } 5863 5864 ret = ocfs2_refcount_cow_xattr(inode, di, &vb, 5865 *ref_tree, ref_root_bh, 0, 5866 le32_to_cpu(vb.vb_xv->xr_clusters), p); 5867 if (ret) 5868 mlog_errno(ret); 5869 5870 out: 5871 brelse(ref_root_bh); 5872 return ret; 5873 } 5874 5875 /* 5876 * Add the REFCOUNTED flags for all the extent rec in ocfs2_xattr_value_root. 5877 * The physical clusters will be added to refcount tree. 5878 */ 5879 static int ocfs2_xattr_value_attach_refcount(struct inode *inode, 5880 struct ocfs2_xattr_value_root *xv, 5881 struct ocfs2_extent_tree *value_et, 5882 struct ocfs2_caching_info *ref_ci, 5883 struct buffer_head *ref_root_bh, 5884 struct ocfs2_cached_dealloc_ctxt *dealloc, 5885 struct ocfs2_post_refcount *refcount) 5886 { 5887 int ret = 0; 5888 u32 clusters = le32_to_cpu(xv->xr_clusters); 5889 u32 cpos, p_cluster, num_clusters; 5890 struct ocfs2_extent_list *el = &xv->xr_list; 5891 unsigned int ext_flags; 5892 5893 cpos = 0; 5894 while (cpos < clusters) { 5895 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, 5896 &num_clusters, el, &ext_flags); 5897 if (ret) { 5898 mlog_errno(ret); 5899 break; 5900 } 5901 5902 cpos += num_clusters; 5903 if ((ext_flags & OCFS2_EXT_REFCOUNTED)) 5904 continue; 5905 5906 BUG_ON(!p_cluster); 5907 5908 ret = ocfs2_add_refcount_flag(inode, value_et, 5909 ref_ci, ref_root_bh, 5910 cpos - num_clusters, 5911 p_cluster, num_clusters, 5912 dealloc, refcount); 5913 if (ret) { 5914 mlog_errno(ret); 5915 break; 5916 } 5917 } 5918 5919 return ret; 5920 } 5921 5922 /* 5923 * Given a normal ocfs2_xattr_header, refcount all the entries which 5924 * have value stored outside. 5925 * Used for xattrs stored in inode and ocfs2_xattr_block. 5926 */ 5927 static int ocfs2_xattr_attach_refcount_normal(struct inode *inode, 5928 struct ocfs2_xattr_value_buf *vb, 5929 struct ocfs2_xattr_header *header, 5930 struct ocfs2_caching_info *ref_ci, 5931 struct buffer_head *ref_root_bh, 5932 struct ocfs2_cached_dealloc_ctxt *dealloc) 5933 { 5934 5935 struct ocfs2_xattr_entry *xe; 5936 struct ocfs2_xattr_value_root *xv; 5937 struct ocfs2_extent_tree et; 5938 int i, ret = 0; 5939 5940 for (i = 0; i < le16_to_cpu(header->xh_count); i++) { 5941 xe = &header->xh_entries[i]; 5942 5943 if (ocfs2_xattr_is_local(xe)) 5944 continue; 5945 5946 xv = (struct ocfs2_xattr_value_root *)((void *)header + 5947 le16_to_cpu(xe->xe_name_offset) + 5948 OCFS2_XATTR_SIZE(xe->xe_name_len)); 5949 5950 vb->vb_xv = xv; 5951 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); 5952 5953 ret = ocfs2_xattr_value_attach_refcount(inode, xv, &et, 5954 ref_ci, ref_root_bh, 5955 dealloc, NULL); 5956 if (ret) { 5957 mlog_errno(ret); 5958 break; 5959 } 5960 } 5961 5962 return ret; 5963 } 5964 5965 static int ocfs2_xattr_inline_attach_refcount(struct inode *inode, 5966 struct buffer_head *fe_bh, 5967 struct ocfs2_caching_info *ref_ci, 5968 struct buffer_head *ref_root_bh, 5969 struct ocfs2_cached_dealloc_ctxt *dealloc) 5970 { 5971 struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data; 5972 struct ocfs2_xattr_header *header = (struct ocfs2_xattr_header *) 5973 (fe_bh->b_data + inode->i_sb->s_blocksize - 5974 le16_to_cpu(di->i_xattr_inline_size)); 5975 struct ocfs2_xattr_value_buf vb = { 5976 .vb_bh = fe_bh, 5977 .vb_access = ocfs2_journal_access_di, 5978 }; 5979 5980 return ocfs2_xattr_attach_refcount_normal(inode, &vb, header, 5981 ref_ci, ref_root_bh, dealloc); 5982 } 5983 5984 struct ocfs2_xattr_tree_value_refcount_para { 5985 struct ocfs2_caching_info *ref_ci; 5986 struct buffer_head *ref_root_bh; 5987 struct ocfs2_cached_dealloc_ctxt *dealloc; 5988 }; 5989 5990 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb, 5991 struct ocfs2_xattr_bucket *bucket, 5992 int offset, 5993 struct ocfs2_xattr_value_root **xv, 5994 struct buffer_head **bh) 5995 { 5996 int ret, block_off, name_offset; 5997 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 5998 struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset]; 5999 void *base; 6000 6001 ret = ocfs2_xattr_bucket_get_name_value(sb, 6002 bucket_xh(bucket), 6003 offset, 6004 &block_off, 6005 &name_offset); 6006 if (ret) { 6007 mlog_errno(ret); 6008 goto out; 6009 } 6010 6011 base = bucket_block(bucket, block_off); 6012 6013 *xv = (struct ocfs2_xattr_value_root *)(base + name_offset + 6014 OCFS2_XATTR_SIZE(xe->xe_name_len)); 6015 6016 if (bh) 6017 *bh = bucket->bu_bhs[block_off]; 6018 out: 6019 return ret; 6020 } 6021 6022 /* 6023 * For a given xattr bucket, refcount all the entries which 6024 * have value stored outside. 6025 */ 6026 static int ocfs2_xattr_bucket_value_refcount(struct inode *inode, 6027 struct ocfs2_xattr_bucket *bucket, 6028 void *para) 6029 { 6030 int i, ret = 0; 6031 struct ocfs2_extent_tree et; 6032 struct ocfs2_xattr_tree_value_refcount_para *ref = 6033 (struct ocfs2_xattr_tree_value_refcount_para *)para; 6034 struct ocfs2_xattr_header *xh = 6035 (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data; 6036 struct ocfs2_xattr_entry *xe; 6037 struct ocfs2_xattr_value_buf vb = { 6038 .vb_access = ocfs2_journal_access, 6039 }; 6040 struct ocfs2_post_refcount refcount = { 6041 .credits = bucket->bu_blocks, 6042 .para = bucket, 6043 .func = ocfs2_xattr_bucket_post_refcount, 6044 }; 6045 struct ocfs2_post_refcount *p = NULL; 6046 6047 /* We only need post_refcount if we support metaecc. */ 6048 if (ocfs2_meta_ecc(OCFS2_SB(inode->i_sb))) 6049 p = &refcount; 6050 6051 trace_ocfs2_xattr_bucket_value_refcount( 6052 (unsigned long long)bucket_blkno(bucket), 6053 le16_to_cpu(xh->xh_count)); 6054 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 6055 xe = &xh->xh_entries[i]; 6056 6057 if (ocfs2_xattr_is_local(xe)) 6058 continue; 6059 6060 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, i, 6061 &vb.vb_xv, &vb.vb_bh); 6062 if (ret) { 6063 mlog_errno(ret); 6064 break; 6065 } 6066 6067 ocfs2_init_xattr_value_extent_tree(&et, 6068 INODE_CACHE(inode), &vb); 6069 6070 ret = ocfs2_xattr_value_attach_refcount(inode, vb.vb_xv, 6071 &et, ref->ref_ci, 6072 ref->ref_root_bh, 6073 ref->dealloc, p); 6074 if (ret) { 6075 mlog_errno(ret); 6076 break; 6077 } 6078 } 6079 6080 return ret; 6081 6082 } 6083 6084 static int ocfs2_refcount_xattr_tree_rec(struct inode *inode, 6085 struct buffer_head *root_bh, 6086 u64 blkno, u32 cpos, u32 len, void *para) 6087 { 6088 return ocfs2_iterate_xattr_buckets(inode, blkno, len, 6089 ocfs2_xattr_bucket_value_refcount, 6090 para); 6091 } 6092 6093 static int ocfs2_xattr_block_attach_refcount(struct inode *inode, 6094 struct buffer_head *blk_bh, 6095 struct ocfs2_caching_info *ref_ci, 6096 struct buffer_head *ref_root_bh, 6097 struct ocfs2_cached_dealloc_ctxt *dealloc) 6098 { 6099 int ret = 0; 6100 struct ocfs2_xattr_block *xb = 6101 (struct ocfs2_xattr_block *)blk_bh->b_data; 6102 6103 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 6104 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header; 6105 struct ocfs2_xattr_value_buf vb = { 6106 .vb_bh = blk_bh, 6107 .vb_access = ocfs2_journal_access_xb, 6108 }; 6109 6110 ret = ocfs2_xattr_attach_refcount_normal(inode, &vb, header, 6111 ref_ci, ref_root_bh, 6112 dealloc); 6113 } else { 6114 struct ocfs2_xattr_tree_value_refcount_para para = { 6115 .ref_ci = ref_ci, 6116 .ref_root_bh = ref_root_bh, 6117 .dealloc = dealloc, 6118 }; 6119 6120 ret = ocfs2_iterate_xattr_index_block(inode, blk_bh, 6121 ocfs2_refcount_xattr_tree_rec, 6122 ¶); 6123 } 6124 6125 return ret; 6126 } 6127 6128 int ocfs2_xattr_attach_refcount_tree(struct inode *inode, 6129 struct buffer_head *fe_bh, 6130 struct ocfs2_caching_info *ref_ci, 6131 struct buffer_head *ref_root_bh, 6132 struct ocfs2_cached_dealloc_ctxt *dealloc) 6133 { 6134 int ret = 0; 6135 struct ocfs2_inode_info *oi = OCFS2_I(inode); 6136 struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data; 6137 struct buffer_head *blk_bh = NULL; 6138 6139 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 6140 ret = ocfs2_xattr_inline_attach_refcount(inode, fe_bh, 6141 ref_ci, ref_root_bh, 6142 dealloc); 6143 if (ret) { 6144 mlog_errno(ret); 6145 goto out; 6146 } 6147 } 6148 6149 if (!di->i_xattr_loc) 6150 goto out; 6151 6152 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc), 6153 &blk_bh); 6154 if (ret < 0) { 6155 mlog_errno(ret); 6156 goto out; 6157 } 6158 6159 ret = ocfs2_xattr_block_attach_refcount(inode, blk_bh, ref_ci, 6160 ref_root_bh, dealloc); 6161 if (ret) 6162 mlog_errno(ret); 6163 6164 brelse(blk_bh); 6165 out: 6166 6167 return ret; 6168 } 6169 6170 typedef int (should_xattr_reflinked)(struct ocfs2_xattr_entry *xe); 6171 /* 6172 * Store the information we need in xattr reflink. 6173 * old_bh and new_bh are inode bh for the old and new inode. 6174 */ 6175 struct ocfs2_xattr_reflink { 6176 struct inode *old_inode; 6177 struct inode *new_inode; 6178 struct buffer_head *old_bh; 6179 struct buffer_head *new_bh; 6180 struct ocfs2_caching_info *ref_ci; 6181 struct buffer_head *ref_root_bh; 6182 struct ocfs2_cached_dealloc_ctxt *dealloc; 6183 should_xattr_reflinked *xattr_reflinked; 6184 }; 6185 6186 /* 6187 * Given a xattr header and xe offset, 6188 * return the proper xv and the corresponding bh. 6189 * xattr in inode, block and xattr tree have different implementaions. 6190 */ 6191 typedef int (get_xattr_value_root)(struct super_block *sb, 6192 struct buffer_head *bh, 6193 struct ocfs2_xattr_header *xh, 6194 int offset, 6195 struct ocfs2_xattr_value_root **xv, 6196 struct buffer_head **ret_bh, 6197 void *para); 6198 6199 /* 6200 * Calculate all the xattr value root metadata stored in this xattr header and 6201 * credits we need if we create them from the scratch. 6202 * We use get_xattr_value_root so that all types of xattr container can use it. 6203 */ 6204 static int ocfs2_value_metas_in_xattr_header(struct super_block *sb, 6205 struct buffer_head *bh, 6206 struct ocfs2_xattr_header *xh, 6207 int *metas, int *credits, 6208 int *num_recs, 6209 get_xattr_value_root *func, 6210 void *para) 6211 { 6212 int i, ret = 0; 6213 struct ocfs2_xattr_value_root *xv; 6214 struct ocfs2_xattr_entry *xe; 6215 6216 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 6217 xe = &xh->xh_entries[i]; 6218 if (ocfs2_xattr_is_local(xe)) 6219 continue; 6220 6221 ret = func(sb, bh, xh, i, &xv, NULL, para); 6222 if (ret) { 6223 mlog_errno(ret); 6224 break; 6225 } 6226 6227 *metas += le16_to_cpu(xv->xr_list.l_tree_depth) * 6228 le16_to_cpu(xv->xr_list.l_next_free_rec); 6229 6230 *credits += ocfs2_calc_extend_credits(sb, 6231 &def_xv.xv.xr_list); 6232 6233 /* 6234 * If the value is a tree with depth > 1, We don't go deep 6235 * to the extent block, so just calculate a maximum record num. 6236 */ 6237 if (!xv->xr_list.l_tree_depth) 6238 *num_recs += le16_to_cpu(xv->xr_list.l_next_free_rec); 6239 else 6240 *num_recs += ocfs2_clusters_for_bytes(sb, 6241 XATTR_SIZE_MAX); 6242 } 6243 6244 return ret; 6245 } 6246 6247 /* Used by xattr inode and block to return the right xv and buffer_head. */ 6248 static int ocfs2_get_xattr_value_root(struct super_block *sb, 6249 struct buffer_head *bh, 6250 struct ocfs2_xattr_header *xh, 6251 int offset, 6252 struct ocfs2_xattr_value_root **xv, 6253 struct buffer_head **ret_bh, 6254 void *para) 6255 { 6256 struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset]; 6257 6258 *xv = (struct ocfs2_xattr_value_root *)((void *)xh + 6259 le16_to_cpu(xe->xe_name_offset) + 6260 OCFS2_XATTR_SIZE(xe->xe_name_len)); 6261 6262 if (ret_bh) 6263 *ret_bh = bh; 6264 6265 return 0; 6266 } 6267 6268 /* 6269 * Lock the meta_ac and caculate how much credits we need for reflink xattrs. 6270 * It is only used for inline xattr and xattr block. 6271 */ 6272 static int ocfs2_reflink_lock_xattr_allocators(struct ocfs2_super *osb, 6273 struct ocfs2_xattr_header *xh, 6274 struct buffer_head *ref_root_bh, 6275 int *credits, 6276 struct ocfs2_alloc_context **meta_ac) 6277 { 6278 int ret, meta_add = 0, num_recs = 0; 6279 struct ocfs2_refcount_block *rb = 6280 (struct ocfs2_refcount_block *)ref_root_bh->b_data; 6281 6282 *credits = 0; 6283 6284 ret = ocfs2_value_metas_in_xattr_header(osb->sb, NULL, xh, 6285 &meta_add, credits, &num_recs, 6286 ocfs2_get_xattr_value_root, 6287 NULL); 6288 if (ret) { 6289 mlog_errno(ret); 6290 goto out; 6291 } 6292 6293 /* 6294 * We need to add/modify num_recs in refcount tree, so just calculate 6295 * an approximate number we need for refcount tree change. 6296 * Sometimes we need to split the tree, and after split, half recs 6297 * will be moved to the new block, and a new block can only provide 6298 * half number of recs. So we multiple new blocks by 2. 6299 */ 6300 num_recs = num_recs / ocfs2_refcount_recs_per_rb(osb->sb) * 2; 6301 meta_add += num_recs; 6302 *credits += num_recs + num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS; 6303 if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL) 6304 *credits += le16_to_cpu(rb->rf_list.l_tree_depth) * 6305 le16_to_cpu(rb->rf_list.l_next_free_rec) + 1; 6306 else 6307 *credits += 1; 6308 6309 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, meta_ac); 6310 if (ret) 6311 mlog_errno(ret); 6312 6313 out: 6314 return ret; 6315 } 6316 6317 /* 6318 * Given a xattr header, reflink all the xattrs in this container. 6319 * It can be used for inode, block and bucket. 6320 * 6321 * NOTE: 6322 * Before we call this function, the caller has memcpy the xattr in 6323 * old_xh to the new_xh. 6324 * 6325 * If args.xattr_reflinked is set, call it to decide whether the xe should 6326 * be reflinked or not. If not, remove it from the new xattr header. 6327 */ 6328 static int ocfs2_reflink_xattr_header(handle_t *handle, 6329 struct ocfs2_xattr_reflink *args, 6330 struct buffer_head *old_bh, 6331 struct ocfs2_xattr_header *xh, 6332 struct buffer_head *new_bh, 6333 struct ocfs2_xattr_header *new_xh, 6334 struct ocfs2_xattr_value_buf *vb, 6335 struct ocfs2_alloc_context *meta_ac, 6336 get_xattr_value_root *func, 6337 void *para) 6338 { 6339 int ret = 0, i, j; 6340 struct super_block *sb = args->old_inode->i_sb; 6341 struct buffer_head *value_bh; 6342 struct ocfs2_xattr_entry *xe, *last; 6343 struct ocfs2_xattr_value_root *xv, *new_xv; 6344 struct ocfs2_extent_tree data_et; 6345 u32 clusters, cpos, p_cluster, num_clusters; 6346 unsigned int ext_flags = 0; 6347 6348 trace_ocfs2_reflink_xattr_header((unsigned long long)old_bh->b_blocknr, 6349 le16_to_cpu(xh->xh_count)); 6350 6351 last = &new_xh->xh_entries[le16_to_cpu(new_xh->xh_count)]; 6352 for (i = 0, j = 0; i < le16_to_cpu(xh->xh_count); i++, j++) { 6353 xe = &xh->xh_entries[i]; 6354 6355 if (args->xattr_reflinked && !args->xattr_reflinked(xe)) { 6356 xe = &new_xh->xh_entries[j]; 6357 6358 le16_add_cpu(&new_xh->xh_count, -1); 6359 if (new_xh->xh_count) { 6360 memmove(xe, xe + 1, 6361 (void *)last - (void *)xe); 6362 memset(last, 0, 6363 sizeof(struct ocfs2_xattr_entry)); 6364 } 6365 6366 /* 6367 * We don't want j to increase in the next round since 6368 * it is already moved ahead. 6369 */ 6370 j--; 6371 continue; 6372 } 6373 6374 if (ocfs2_xattr_is_local(xe)) 6375 continue; 6376 6377 ret = func(sb, old_bh, xh, i, &xv, NULL, para); 6378 if (ret) { 6379 mlog_errno(ret); 6380 break; 6381 } 6382 6383 ret = func(sb, new_bh, new_xh, j, &new_xv, &value_bh, para); 6384 if (ret) { 6385 mlog_errno(ret); 6386 break; 6387 } 6388 6389 /* 6390 * For the xattr which has l_tree_depth = 0, all the extent 6391 * recs have already be copied to the new xh with the 6392 * propriate OCFS2_EXT_REFCOUNTED flag we just need to 6393 * increase the refount count int the refcount tree. 6394 * 6395 * For the xattr which has l_tree_depth > 0, we need 6396 * to initialize it to the empty default value root, 6397 * and then insert the extents one by one. 6398 */ 6399 if (xv->xr_list.l_tree_depth) { 6400 memcpy(new_xv, &def_xv, OCFS2_XATTR_ROOT_SIZE); 6401 vb->vb_xv = new_xv; 6402 vb->vb_bh = value_bh; 6403 ocfs2_init_xattr_value_extent_tree(&data_et, 6404 INODE_CACHE(args->new_inode), vb); 6405 } 6406 6407 clusters = le32_to_cpu(xv->xr_clusters); 6408 cpos = 0; 6409 while (cpos < clusters) { 6410 ret = ocfs2_xattr_get_clusters(args->old_inode, 6411 cpos, 6412 &p_cluster, 6413 &num_clusters, 6414 &xv->xr_list, 6415 &ext_flags); 6416 if (ret) { 6417 mlog_errno(ret); 6418 goto out; 6419 } 6420 6421 BUG_ON(!p_cluster); 6422 6423 if (xv->xr_list.l_tree_depth) { 6424 ret = ocfs2_insert_extent(handle, 6425 &data_et, cpos, 6426 ocfs2_clusters_to_blocks( 6427 args->old_inode->i_sb, 6428 p_cluster), 6429 num_clusters, ext_flags, 6430 meta_ac); 6431 if (ret) { 6432 mlog_errno(ret); 6433 goto out; 6434 } 6435 } 6436 6437 ret = ocfs2_increase_refcount(handle, args->ref_ci, 6438 args->ref_root_bh, 6439 p_cluster, num_clusters, 6440 meta_ac, args->dealloc); 6441 if (ret) { 6442 mlog_errno(ret); 6443 goto out; 6444 } 6445 6446 cpos += num_clusters; 6447 } 6448 } 6449 6450 out: 6451 return ret; 6452 } 6453 6454 static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args) 6455 { 6456 int ret = 0, credits = 0; 6457 handle_t *handle; 6458 struct ocfs2_super *osb = OCFS2_SB(args->old_inode->i_sb); 6459 struct ocfs2_dinode *di = (struct ocfs2_dinode *)args->old_bh->b_data; 6460 int inline_size = le16_to_cpu(di->i_xattr_inline_size); 6461 int header_off = osb->sb->s_blocksize - inline_size; 6462 struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *) 6463 (args->old_bh->b_data + header_off); 6464 struct ocfs2_xattr_header *new_xh = (struct ocfs2_xattr_header *) 6465 (args->new_bh->b_data + header_off); 6466 struct ocfs2_alloc_context *meta_ac = NULL; 6467 struct ocfs2_inode_info *new_oi; 6468 struct ocfs2_dinode *new_di; 6469 struct ocfs2_xattr_value_buf vb = { 6470 .vb_bh = args->new_bh, 6471 .vb_access = ocfs2_journal_access_di, 6472 }; 6473 6474 ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh, 6475 &credits, &meta_ac); 6476 if (ret) { 6477 mlog_errno(ret); 6478 goto out; 6479 } 6480 6481 handle = ocfs2_start_trans(osb, credits); 6482 if (IS_ERR(handle)) { 6483 ret = PTR_ERR(handle); 6484 mlog_errno(ret); 6485 goto out; 6486 } 6487 6488 ret = ocfs2_journal_access_di(handle, INODE_CACHE(args->new_inode), 6489 args->new_bh, OCFS2_JOURNAL_ACCESS_WRITE); 6490 if (ret) { 6491 mlog_errno(ret); 6492 goto out_commit; 6493 } 6494 6495 memcpy(args->new_bh->b_data + header_off, 6496 args->old_bh->b_data + header_off, inline_size); 6497 6498 new_di = (struct ocfs2_dinode *)args->new_bh->b_data; 6499 new_di->i_xattr_inline_size = cpu_to_le16(inline_size); 6500 6501 ret = ocfs2_reflink_xattr_header(handle, args, args->old_bh, xh, 6502 args->new_bh, new_xh, &vb, meta_ac, 6503 ocfs2_get_xattr_value_root, NULL); 6504 if (ret) { 6505 mlog_errno(ret); 6506 goto out_commit; 6507 } 6508 6509 new_oi = OCFS2_I(args->new_inode); 6510 /* 6511 * Adjust extent record count to reserve space for extended attribute. 6512 * Inline data count had been adjusted in ocfs2_duplicate_inline_data(). 6513 */ 6514 if (!(new_oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) && 6515 !(ocfs2_inode_is_fast_symlink(args->new_inode))) { 6516 struct ocfs2_extent_list *el = &new_di->id2.i_list; 6517 le16_add_cpu(&el->l_count, -(inline_size / 6518 sizeof(struct ocfs2_extent_rec))); 6519 } 6520 spin_lock(&new_oi->ip_lock); 6521 new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL; 6522 new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features); 6523 spin_unlock(&new_oi->ip_lock); 6524 6525 ocfs2_journal_dirty(handle, args->new_bh); 6526 6527 out_commit: 6528 ocfs2_commit_trans(osb, handle); 6529 6530 out: 6531 if (meta_ac) 6532 ocfs2_free_alloc_context(meta_ac); 6533 return ret; 6534 } 6535 6536 static int ocfs2_create_empty_xattr_block(struct inode *inode, 6537 struct buffer_head *fe_bh, 6538 struct buffer_head **ret_bh, 6539 int indexed) 6540 { 6541 int ret; 6542 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 6543 struct ocfs2_xattr_set_ctxt ctxt; 6544 6545 memset(&ctxt, 0, sizeof(ctxt)); 6546 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &ctxt.meta_ac); 6547 if (ret < 0) { 6548 mlog_errno(ret); 6549 return ret; 6550 } 6551 6552 ctxt.handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_CREATE_CREDITS); 6553 if (IS_ERR(ctxt.handle)) { 6554 ret = PTR_ERR(ctxt.handle); 6555 mlog_errno(ret); 6556 goto out; 6557 } 6558 6559 trace_ocfs2_create_empty_xattr_block( 6560 (unsigned long long)fe_bh->b_blocknr, indexed); 6561 ret = ocfs2_create_xattr_block(inode, fe_bh, &ctxt, indexed, 6562 ret_bh); 6563 if (ret) 6564 mlog_errno(ret); 6565 6566 ocfs2_commit_trans(osb, ctxt.handle); 6567 out: 6568 ocfs2_free_alloc_context(ctxt.meta_ac); 6569 return ret; 6570 } 6571 6572 static int ocfs2_reflink_xattr_block(struct ocfs2_xattr_reflink *args, 6573 struct buffer_head *blk_bh, 6574 struct buffer_head *new_blk_bh) 6575 { 6576 int ret = 0, credits = 0; 6577 handle_t *handle; 6578 struct ocfs2_inode_info *new_oi = OCFS2_I(args->new_inode); 6579 struct ocfs2_dinode *new_di; 6580 struct ocfs2_super *osb = OCFS2_SB(args->new_inode->i_sb); 6581 int header_off = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); 6582 struct ocfs2_xattr_block *xb = 6583 (struct ocfs2_xattr_block *)blk_bh->b_data; 6584 struct ocfs2_xattr_header *xh = &xb->xb_attrs.xb_header; 6585 struct ocfs2_xattr_block *new_xb = 6586 (struct ocfs2_xattr_block *)new_blk_bh->b_data; 6587 struct ocfs2_xattr_header *new_xh = &new_xb->xb_attrs.xb_header; 6588 struct ocfs2_alloc_context *meta_ac; 6589 struct ocfs2_xattr_value_buf vb = { 6590 .vb_bh = new_blk_bh, 6591 .vb_access = ocfs2_journal_access_xb, 6592 }; 6593 6594 ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh, 6595 &credits, &meta_ac); 6596 if (ret) { 6597 mlog_errno(ret); 6598 return ret; 6599 } 6600 6601 /* One more credits in case we need to add xattr flags in new inode. */ 6602 handle = ocfs2_start_trans(osb, credits + 1); 6603 if (IS_ERR(handle)) { 6604 ret = PTR_ERR(handle); 6605 mlog_errno(ret); 6606 goto out; 6607 } 6608 6609 if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) { 6610 ret = ocfs2_journal_access_di(handle, 6611 INODE_CACHE(args->new_inode), 6612 args->new_bh, 6613 OCFS2_JOURNAL_ACCESS_WRITE); 6614 if (ret) { 6615 mlog_errno(ret); 6616 goto out_commit; 6617 } 6618 } 6619 6620 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(args->new_inode), 6621 new_blk_bh, OCFS2_JOURNAL_ACCESS_WRITE); 6622 if (ret) { 6623 mlog_errno(ret); 6624 goto out_commit; 6625 } 6626 6627 memcpy(new_blk_bh->b_data + header_off, blk_bh->b_data + header_off, 6628 osb->sb->s_blocksize - header_off); 6629 6630 ret = ocfs2_reflink_xattr_header(handle, args, blk_bh, xh, 6631 new_blk_bh, new_xh, &vb, meta_ac, 6632 ocfs2_get_xattr_value_root, NULL); 6633 if (ret) { 6634 mlog_errno(ret); 6635 goto out_commit; 6636 } 6637 6638 ocfs2_journal_dirty(handle, new_blk_bh); 6639 6640 if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) { 6641 new_di = (struct ocfs2_dinode *)args->new_bh->b_data; 6642 spin_lock(&new_oi->ip_lock); 6643 new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL; 6644 new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features); 6645 spin_unlock(&new_oi->ip_lock); 6646 6647 ocfs2_journal_dirty(handle, args->new_bh); 6648 } 6649 6650 out_commit: 6651 ocfs2_commit_trans(osb, handle); 6652 6653 out: 6654 ocfs2_free_alloc_context(meta_ac); 6655 return ret; 6656 } 6657 6658 struct ocfs2_reflink_xattr_tree_args { 6659 struct ocfs2_xattr_reflink *reflink; 6660 struct buffer_head *old_blk_bh; 6661 struct buffer_head *new_blk_bh; 6662 struct ocfs2_xattr_bucket *old_bucket; 6663 struct ocfs2_xattr_bucket *new_bucket; 6664 }; 6665 6666 /* 6667 * NOTE: 6668 * We have to handle the case that both old bucket and new bucket 6669 * will call this function to get the right ret_bh. 6670 * So The caller must give us the right bh. 6671 */ 6672 static int ocfs2_get_reflink_xattr_value_root(struct super_block *sb, 6673 struct buffer_head *bh, 6674 struct ocfs2_xattr_header *xh, 6675 int offset, 6676 struct ocfs2_xattr_value_root **xv, 6677 struct buffer_head **ret_bh, 6678 void *para) 6679 { 6680 struct ocfs2_reflink_xattr_tree_args *args = 6681 (struct ocfs2_reflink_xattr_tree_args *)para; 6682 struct ocfs2_xattr_bucket *bucket; 6683 6684 if (bh == args->old_bucket->bu_bhs[0]) 6685 bucket = args->old_bucket; 6686 else 6687 bucket = args->new_bucket; 6688 6689 return ocfs2_get_xattr_tree_value_root(sb, bucket, offset, 6690 xv, ret_bh); 6691 } 6692 6693 struct ocfs2_value_tree_metas { 6694 int num_metas; 6695 int credits; 6696 int num_recs; 6697 }; 6698 6699 static int ocfs2_value_tree_metas_in_bucket(struct super_block *sb, 6700 struct buffer_head *bh, 6701 struct ocfs2_xattr_header *xh, 6702 int offset, 6703 struct ocfs2_xattr_value_root **xv, 6704 struct buffer_head **ret_bh, 6705 void *para) 6706 { 6707 struct ocfs2_xattr_bucket *bucket = 6708 (struct ocfs2_xattr_bucket *)para; 6709 6710 return ocfs2_get_xattr_tree_value_root(sb, bucket, offset, 6711 xv, ret_bh); 6712 } 6713 6714 static int ocfs2_calc_value_tree_metas(struct inode *inode, 6715 struct ocfs2_xattr_bucket *bucket, 6716 void *para) 6717 { 6718 struct ocfs2_value_tree_metas *metas = 6719 (struct ocfs2_value_tree_metas *)para; 6720 struct ocfs2_xattr_header *xh = 6721 (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data; 6722 6723 /* Add the credits for this bucket first. */ 6724 metas->credits += bucket->bu_blocks; 6725 return ocfs2_value_metas_in_xattr_header(inode->i_sb, bucket->bu_bhs[0], 6726 xh, &metas->num_metas, 6727 &metas->credits, &metas->num_recs, 6728 ocfs2_value_tree_metas_in_bucket, 6729 bucket); 6730 } 6731 6732 /* 6733 * Given a xattr extent rec starting from blkno and having len clusters, 6734 * iterate all the buckets calculate how much metadata we need for reflinking 6735 * all the ocfs2_xattr_value_root and lock the allocators accordingly. 6736 */ 6737 static int ocfs2_lock_reflink_xattr_rec_allocators( 6738 struct ocfs2_reflink_xattr_tree_args *args, 6739 struct ocfs2_extent_tree *xt_et, 6740 u64 blkno, u32 len, int *credits, 6741 struct ocfs2_alloc_context **meta_ac, 6742 struct ocfs2_alloc_context **data_ac) 6743 { 6744 int ret, num_free_extents; 6745 struct ocfs2_value_tree_metas metas; 6746 struct ocfs2_super *osb = OCFS2_SB(args->reflink->old_inode->i_sb); 6747 struct ocfs2_refcount_block *rb; 6748 6749 memset(&metas, 0, sizeof(metas)); 6750 6751 ret = ocfs2_iterate_xattr_buckets(args->reflink->old_inode, blkno, len, 6752 ocfs2_calc_value_tree_metas, &metas); 6753 if (ret) { 6754 mlog_errno(ret); 6755 goto out; 6756 } 6757 6758 *credits = metas.credits; 6759 6760 /* 6761 * Calculate we need for refcount tree change. 6762 * 6763 * We need to add/modify num_recs in refcount tree, so just calculate 6764 * an approximate number we need for refcount tree change. 6765 * Sometimes we need to split the tree, and after split, half recs 6766 * will be moved to the new block, and a new block can only provide 6767 * half number of recs. So we multiple new blocks by 2. 6768 * In the end, we have to add credits for modifying the already 6769 * existed refcount block. 6770 */ 6771 rb = (struct ocfs2_refcount_block *)args->reflink->ref_root_bh->b_data; 6772 metas.num_recs = 6773 (metas.num_recs + ocfs2_refcount_recs_per_rb(osb->sb) - 1) / 6774 ocfs2_refcount_recs_per_rb(osb->sb) * 2; 6775 metas.num_metas += metas.num_recs; 6776 *credits += metas.num_recs + 6777 metas.num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS; 6778 if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL) 6779 *credits += le16_to_cpu(rb->rf_list.l_tree_depth) * 6780 le16_to_cpu(rb->rf_list.l_next_free_rec) + 1; 6781 else 6782 *credits += 1; 6783 6784 /* count in the xattr tree change. */ 6785 num_free_extents = ocfs2_num_free_extents(xt_et); 6786 if (num_free_extents < 0) { 6787 ret = num_free_extents; 6788 mlog_errno(ret); 6789 goto out; 6790 } 6791 6792 if (num_free_extents < len) 6793 metas.num_metas += ocfs2_extend_meta_needed(xt_et->et_root_el); 6794 6795 *credits += ocfs2_calc_extend_credits(osb->sb, 6796 xt_et->et_root_el); 6797 6798 if (metas.num_metas) { 6799 ret = ocfs2_reserve_new_metadata_blocks(osb, metas.num_metas, 6800 meta_ac); 6801 if (ret) { 6802 mlog_errno(ret); 6803 goto out; 6804 } 6805 } 6806 6807 if (len) { 6808 ret = ocfs2_reserve_clusters(osb, len, data_ac); 6809 if (ret) 6810 mlog_errno(ret); 6811 } 6812 out: 6813 if (ret) { 6814 if (*meta_ac) { 6815 ocfs2_free_alloc_context(*meta_ac); 6816 *meta_ac = NULL; 6817 } 6818 } 6819 6820 return ret; 6821 } 6822 6823 static int ocfs2_reflink_xattr_bucket(handle_t *handle, 6824 u64 blkno, u64 new_blkno, u32 clusters, 6825 u32 *cpos, int num_buckets, 6826 struct ocfs2_alloc_context *meta_ac, 6827 struct ocfs2_alloc_context *data_ac, 6828 struct ocfs2_reflink_xattr_tree_args *args) 6829 { 6830 int i, j, ret = 0; 6831 struct super_block *sb = args->reflink->old_inode->i_sb; 6832 int bpb = args->old_bucket->bu_blocks; 6833 struct ocfs2_xattr_value_buf vb = { 6834 .vb_access = ocfs2_journal_access, 6835 }; 6836 6837 for (i = 0; i < num_buckets; i++, blkno += bpb, new_blkno += bpb) { 6838 ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno); 6839 if (ret) { 6840 mlog_errno(ret); 6841 break; 6842 } 6843 6844 ret = ocfs2_init_xattr_bucket(args->new_bucket, new_blkno, 1); 6845 if (ret) { 6846 mlog_errno(ret); 6847 break; 6848 } 6849 6850 ret = ocfs2_xattr_bucket_journal_access(handle, 6851 args->new_bucket, 6852 OCFS2_JOURNAL_ACCESS_CREATE); 6853 if (ret) { 6854 mlog_errno(ret); 6855 break; 6856 } 6857 6858 for (j = 0; j < bpb; j++) 6859 memcpy(bucket_block(args->new_bucket, j), 6860 bucket_block(args->old_bucket, j), 6861 sb->s_blocksize); 6862 6863 /* 6864 * Record the start cpos so that we can use it to initialize 6865 * our xattr tree we also set the xh_num_bucket for the new 6866 * bucket. 6867 */ 6868 if (i == 0) { 6869 *cpos = le32_to_cpu(bucket_xh(args->new_bucket)-> 6870 xh_entries[0].xe_name_hash); 6871 bucket_xh(args->new_bucket)->xh_num_buckets = 6872 cpu_to_le16(num_buckets); 6873 } 6874 6875 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket); 6876 6877 ret = ocfs2_reflink_xattr_header(handle, args->reflink, 6878 args->old_bucket->bu_bhs[0], 6879 bucket_xh(args->old_bucket), 6880 args->new_bucket->bu_bhs[0], 6881 bucket_xh(args->new_bucket), 6882 &vb, meta_ac, 6883 ocfs2_get_reflink_xattr_value_root, 6884 args); 6885 if (ret) { 6886 mlog_errno(ret); 6887 break; 6888 } 6889 6890 /* 6891 * Re-access and dirty the bucket to calculate metaecc. 6892 * Because we may extend the transaction in reflink_xattr_header 6893 * which will let the already accessed block gone. 6894 */ 6895 ret = ocfs2_xattr_bucket_journal_access(handle, 6896 args->new_bucket, 6897 OCFS2_JOURNAL_ACCESS_WRITE); 6898 if (ret) { 6899 mlog_errno(ret); 6900 break; 6901 } 6902 6903 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket); 6904 6905 ocfs2_xattr_bucket_relse(args->old_bucket); 6906 ocfs2_xattr_bucket_relse(args->new_bucket); 6907 } 6908 6909 ocfs2_xattr_bucket_relse(args->old_bucket); 6910 ocfs2_xattr_bucket_relse(args->new_bucket); 6911 return ret; 6912 } 6913 6914 static int ocfs2_reflink_xattr_buckets(handle_t *handle, 6915 struct inode *inode, 6916 struct ocfs2_reflink_xattr_tree_args *args, 6917 struct ocfs2_extent_tree *et, 6918 struct ocfs2_alloc_context *meta_ac, 6919 struct ocfs2_alloc_context *data_ac, 6920 u64 blkno, u32 cpos, u32 len) 6921 { 6922 int ret, first_inserted = 0; 6923 u32 p_cluster, num_clusters, reflink_cpos = 0; 6924 u64 new_blkno; 6925 unsigned int num_buckets, reflink_buckets; 6926 unsigned int bpc = 6927 ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)); 6928 6929 ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno); 6930 if (ret) { 6931 mlog_errno(ret); 6932 goto out; 6933 } 6934 num_buckets = le16_to_cpu(bucket_xh(args->old_bucket)->xh_num_buckets); 6935 ocfs2_xattr_bucket_relse(args->old_bucket); 6936 6937 while (len && num_buckets) { 6938 ret = ocfs2_claim_clusters(handle, data_ac, 6939 1, &p_cluster, &num_clusters); 6940 if (ret) { 6941 mlog_errno(ret); 6942 goto out; 6943 } 6944 6945 new_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 6946 reflink_buckets = min(num_buckets, bpc * num_clusters); 6947 6948 ret = ocfs2_reflink_xattr_bucket(handle, blkno, 6949 new_blkno, num_clusters, 6950 &reflink_cpos, reflink_buckets, 6951 meta_ac, data_ac, args); 6952 if (ret) { 6953 mlog_errno(ret); 6954 goto out; 6955 } 6956 6957 /* 6958 * For the 1st allocated cluster, we make it use the same cpos 6959 * so that the xattr tree looks the same as the original one 6960 * in the most case. 6961 */ 6962 if (!first_inserted) { 6963 reflink_cpos = cpos; 6964 first_inserted = 1; 6965 } 6966 ret = ocfs2_insert_extent(handle, et, reflink_cpos, new_blkno, 6967 num_clusters, 0, meta_ac); 6968 if (ret) 6969 mlog_errno(ret); 6970 6971 trace_ocfs2_reflink_xattr_buckets((unsigned long long)new_blkno, 6972 num_clusters, reflink_cpos); 6973 6974 len -= num_clusters; 6975 blkno += ocfs2_clusters_to_blocks(inode->i_sb, num_clusters); 6976 num_buckets -= reflink_buckets; 6977 } 6978 out: 6979 return ret; 6980 } 6981 6982 /* 6983 * Create the same xattr extent record in the new inode's xattr tree. 6984 */ 6985 static int ocfs2_reflink_xattr_rec(struct inode *inode, 6986 struct buffer_head *root_bh, 6987 u64 blkno, 6988 u32 cpos, 6989 u32 len, 6990 void *para) 6991 { 6992 int ret, credits = 0; 6993 handle_t *handle; 6994 struct ocfs2_reflink_xattr_tree_args *args = 6995 (struct ocfs2_reflink_xattr_tree_args *)para; 6996 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 6997 struct ocfs2_alloc_context *meta_ac = NULL; 6998 struct ocfs2_alloc_context *data_ac = NULL; 6999 struct ocfs2_extent_tree et; 7000 7001 trace_ocfs2_reflink_xattr_rec((unsigned long long)blkno, len); 7002 7003 ocfs2_init_xattr_tree_extent_tree(&et, 7004 INODE_CACHE(args->reflink->new_inode), 7005 args->new_blk_bh); 7006 7007 ret = ocfs2_lock_reflink_xattr_rec_allocators(args, &et, blkno, 7008 len, &credits, 7009 &meta_ac, &data_ac); 7010 if (ret) { 7011 mlog_errno(ret); 7012 goto out; 7013 } 7014 7015 handle = ocfs2_start_trans(osb, credits); 7016 if (IS_ERR(handle)) { 7017 ret = PTR_ERR(handle); 7018 mlog_errno(ret); 7019 goto out; 7020 } 7021 7022 ret = ocfs2_reflink_xattr_buckets(handle, inode, args, &et, 7023 meta_ac, data_ac, 7024 blkno, cpos, len); 7025 if (ret) 7026 mlog_errno(ret); 7027 7028 ocfs2_commit_trans(osb, handle); 7029 7030 out: 7031 if (meta_ac) 7032 ocfs2_free_alloc_context(meta_ac); 7033 if (data_ac) 7034 ocfs2_free_alloc_context(data_ac); 7035 return ret; 7036 } 7037 7038 /* 7039 * Create reflinked xattr buckets. 7040 * We will add bucket one by one, and refcount all the xattrs in the bucket 7041 * if they are stored outside. 7042 */ 7043 static int ocfs2_reflink_xattr_tree(struct ocfs2_xattr_reflink *args, 7044 struct buffer_head *blk_bh, 7045 struct buffer_head *new_blk_bh) 7046 { 7047 int ret; 7048 struct ocfs2_reflink_xattr_tree_args para; 7049 7050 memset(¶, 0, sizeof(para)); 7051 para.reflink = args; 7052 para.old_blk_bh = blk_bh; 7053 para.new_blk_bh = new_blk_bh; 7054 7055 para.old_bucket = ocfs2_xattr_bucket_new(args->old_inode); 7056 if (!para.old_bucket) { 7057 mlog_errno(-ENOMEM); 7058 return -ENOMEM; 7059 } 7060 7061 para.new_bucket = ocfs2_xattr_bucket_new(args->new_inode); 7062 if (!para.new_bucket) { 7063 ret = -ENOMEM; 7064 mlog_errno(ret); 7065 goto out; 7066 } 7067 7068 ret = ocfs2_iterate_xattr_index_block(args->old_inode, blk_bh, 7069 ocfs2_reflink_xattr_rec, 7070 ¶); 7071 if (ret) 7072 mlog_errno(ret); 7073 7074 out: 7075 ocfs2_xattr_bucket_free(para.old_bucket); 7076 ocfs2_xattr_bucket_free(para.new_bucket); 7077 return ret; 7078 } 7079 7080 static int ocfs2_reflink_xattr_in_block(struct ocfs2_xattr_reflink *args, 7081 struct buffer_head *blk_bh) 7082 { 7083 int ret, indexed = 0; 7084 struct buffer_head *new_blk_bh = NULL; 7085 struct ocfs2_xattr_block *xb = 7086 (struct ocfs2_xattr_block *)blk_bh->b_data; 7087 7088 7089 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) 7090 indexed = 1; 7091 7092 ret = ocfs2_create_empty_xattr_block(args->new_inode, args->new_bh, 7093 &new_blk_bh, indexed); 7094 if (ret) { 7095 mlog_errno(ret); 7096 goto out; 7097 } 7098 7099 if (!indexed) 7100 ret = ocfs2_reflink_xattr_block(args, blk_bh, new_blk_bh); 7101 else 7102 ret = ocfs2_reflink_xattr_tree(args, blk_bh, new_blk_bh); 7103 if (ret) 7104 mlog_errno(ret); 7105 7106 out: 7107 brelse(new_blk_bh); 7108 return ret; 7109 } 7110 7111 static int ocfs2_reflink_xattr_no_security(struct ocfs2_xattr_entry *xe) 7112 { 7113 int type = ocfs2_xattr_get_type(xe); 7114 7115 return type != OCFS2_XATTR_INDEX_SECURITY && 7116 type != OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS && 7117 type != OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT; 7118 } 7119 7120 int ocfs2_reflink_xattrs(struct inode *old_inode, 7121 struct buffer_head *old_bh, 7122 struct inode *new_inode, 7123 struct buffer_head *new_bh, 7124 bool preserve_security) 7125 { 7126 int ret; 7127 struct ocfs2_xattr_reflink args; 7128 struct ocfs2_inode_info *oi = OCFS2_I(old_inode); 7129 struct ocfs2_dinode *di = (struct ocfs2_dinode *)old_bh->b_data; 7130 struct buffer_head *blk_bh = NULL; 7131 struct ocfs2_cached_dealloc_ctxt dealloc; 7132 struct ocfs2_refcount_tree *ref_tree; 7133 struct buffer_head *ref_root_bh = NULL; 7134 7135 ret = ocfs2_lock_refcount_tree(OCFS2_SB(old_inode->i_sb), 7136 le64_to_cpu(di->i_refcount_loc), 7137 1, &ref_tree, &ref_root_bh); 7138 if (ret) { 7139 mlog_errno(ret); 7140 goto out; 7141 } 7142 7143 ocfs2_init_dealloc_ctxt(&dealloc); 7144 7145 args.old_inode = old_inode; 7146 args.new_inode = new_inode; 7147 args.old_bh = old_bh; 7148 args.new_bh = new_bh; 7149 args.ref_ci = &ref_tree->rf_ci; 7150 args.ref_root_bh = ref_root_bh; 7151 args.dealloc = &dealloc; 7152 if (preserve_security) 7153 args.xattr_reflinked = NULL; 7154 else 7155 args.xattr_reflinked = ocfs2_reflink_xattr_no_security; 7156 7157 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 7158 ret = ocfs2_reflink_xattr_inline(&args); 7159 if (ret) { 7160 mlog_errno(ret); 7161 goto out_unlock; 7162 } 7163 } 7164 7165 if (!di->i_xattr_loc) 7166 goto out_unlock; 7167 7168 ret = ocfs2_read_xattr_block(old_inode, le64_to_cpu(di->i_xattr_loc), 7169 &blk_bh); 7170 if (ret < 0) { 7171 mlog_errno(ret); 7172 goto out_unlock; 7173 } 7174 7175 ret = ocfs2_reflink_xattr_in_block(&args, blk_bh); 7176 if (ret) 7177 mlog_errno(ret); 7178 7179 brelse(blk_bh); 7180 7181 out_unlock: 7182 ocfs2_unlock_refcount_tree(OCFS2_SB(old_inode->i_sb), 7183 ref_tree, 1); 7184 brelse(ref_root_bh); 7185 7186 if (ocfs2_dealloc_has_cluster(&dealloc)) { 7187 ocfs2_schedule_truncate_log_flush(OCFS2_SB(old_inode->i_sb), 1); 7188 ocfs2_run_deallocs(OCFS2_SB(old_inode->i_sb), &dealloc); 7189 } 7190 7191 out: 7192 return ret; 7193 } 7194 7195 /* 7196 * Initialize security and acl for a already created inode. 7197 * Used for reflink a non-preserve-security file. 7198 * 7199 * It uses common api like ocfs2_xattr_set, so the caller 7200 * must not hold any lock expect i_mutex. 7201 */ 7202 int ocfs2_init_security_and_acl(struct inode *dir, 7203 struct inode *inode, 7204 const struct qstr *qstr) 7205 { 7206 int ret = 0; 7207 struct buffer_head *dir_bh = NULL; 7208 7209 ret = ocfs2_init_security_get(inode, dir, qstr, NULL); 7210 if (ret) { 7211 mlog_errno(ret); 7212 goto leave; 7213 } 7214 7215 ret = ocfs2_inode_lock(dir, &dir_bh, 0); 7216 if (ret) { 7217 mlog_errno(ret); 7218 goto leave; 7219 } 7220 ret = ocfs2_init_acl(NULL, inode, dir, NULL, dir_bh, NULL, NULL); 7221 if (ret) 7222 mlog_errno(ret); 7223 7224 ocfs2_inode_unlock(dir, 0); 7225 brelse(dir_bh); 7226 leave: 7227 return ret; 7228 } 7229 7230 /* 7231 * 'security' attributes support 7232 */ 7233 static int ocfs2_xattr_security_get(const struct xattr_handler *handler, 7234 struct dentry *unused, struct inode *inode, 7235 const char *name, void *buffer, size_t size) 7236 { 7237 return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_SECURITY, 7238 name, buffer, size); 7239 } 7240 7241 static int ocfs2_xattr_security_set(const struct xattr_handler *handler, 7242 struct dentry *unused, struct inode *inode, 7243 const char *name, const void *value, 7244 size_t size, int flags) 7245 { 7246 return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, 7247 name, value, size, flags); 7248 } 7249 7250 static int ocfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array, 7251 void *fs_info) 7252 { 7253 const struct xattr *xattr; 7254 int err = 0; 7255 7256 for (xattr = xattr_array; xattr->name != NULL; xattr++) { 7257 err = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, 7258 xattr->name, xattr->value, 7259 xattr->value_len, XATTR_CREATE); 7260 if (err) 7261 break; 7262 } 7263 return err; 7264 } 7265 7266 int ocfs2_init_security_get(struct inode *inode, 7267 struct inode *dir, 7268 const struct qstr *qstr, 7269 struct ocfs2_security_xattr_info *si) 7270 { 7271 /* check whether ocfs2 support feature xattr */ 7272 if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb))) 7273 return -EOPNOTSUPP; 7274 if (si) 7275 return security_old_inode_init_security(inode, dir, qstr, 7276 &si->name, &si->value, 7277 &si->value_len); 7278 7279 return security_inode_init_security(inode, dir, qstr, 7280 &ocfs2_initxattrs, NULL); 7281 } 7282 7283 int ocfs2_init_security_set(handle_t *handle, 7284 struct inode *inode, 7285 struct buffer_head *di_bh, 7286 struct ocfs2_security_xattr_info *si, 7287 struct ocfs2_alloc_context *xattr_ac, 7288 struct ocfs2_alloc_context *data_ac) 7289 { 7290 return ocfs2_xattr_set_handle(handle, inode, di_bh, 7291 OCFS2_XATTR_INDEX_SECURITY, 7292 si->name, si->value, si->value_len, 0, 7293 xattr_ac, data_ac); 7294 } 7295 7296 const struct xattr_handler ocfs2_xattr_security_handler = { 7297 .prefix = XATTR_SECURITY_PREFIX, 7298 .get = ocfs2_xattr_security_get, 7299 .set = ocfs2_xattr_security_set, 7300 }; 7301 7302 /* 7303 * 'trusted' attributes support 7304 */ 7305 static int ocfs2_xattr_trusted_get(const struct xattr_handler *handler, 7306 struct dentry *unused, struct inode *inode, 7307 const char *name, void *buffer, size_t size) 7308 { 7309 return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_TRUSTED, 7310 name, buffer, size); 7311 } 7312 7313 static int ocfs2_xattr_trusted_set(const struct xattr_handler *handler, 7314 struct dentry *unused, struct inode *inode, 7315 const char *name, const void *value, 7316 size_t size, int flags) 7317 { 7318 return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_TRUSTED, 7319 name, value, size, flags); 7320 } 7321 7322 const struct xattr_handler ocfs2_xattr_trusted_handler = { 7323 .prefix = XATTR_TRUSTED_PREFIX, 7324 .get = ocfs2_xattr_trusted_get, 7325 .set = ocfs2_xattr_trusted_set, 7326 }; 7327 7328 /* 7329 * 'user' attributes support 7330 */ 7331 static int ocfs2_xattr_user_get(const struct xattr_handler *handler, 7332 struct dentry *unused, struct inode *inode, 7333 const char *name, void *buffer, size_t size) 7334 { 7335 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 7336 7337 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 7338 return -EOPNOTSUPP; 7339 return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_USER, name, 7340 buffer, size); 7341 } 7342 7343 static int ocfs2_xattr_user_set(const struct xattr_handler *handler, 7344 struct dentry *unused, struct inode *inode, 7345 const char *name, const void *value, 7346 size_t size, int flags) 7347 { 7348 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 7349 7350 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 7351 return -EOPNOTSUPP; 7352 7353 return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_USER, 7354 name, value, size, flags); 7355 } 7356 7357 const struct xattr_handler ocfs2_xattr_user_handler = { 7358 .prefix = XATTR_USER_PREFIX, 7359 .get = ocfs2_xattr_user_get, 7360 .set = ocfs2_xattr_user_set, 7361 }; 7362