1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * xattr.c 4 * 5 * Copyright (C) 2004, 2008 Oracle. All rights reserved. 6 * 7 * CREDITS: 8 * Lots of code in this file is copy from linux/fs/ext3/xattr.c. 9 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> 10 */ 11 12 #include <linux/capability.h> 13 #include <linux/fs.h> 14 #include <linux/types.h> 15 #include <linux/slab.h> 16 #include <linux/highmem.h> 17 #include <linux/pagemap.h> 18 #include <linux/uio.h> 19 #include <linux/sched.h> 20 #include <linux/splice.h> 21 #include <linux/mount.h> 22 #include <linux/writeback.h> 23 #include <linux/falloc.h> 24 #include <linux/sort.h> 25 #include <linux/init.h> 26 #include <linux/module.h> 27 #include <linux/string.h> 28 #include <linux/security.h> 29 30 #include <cluster/masklog.h> 31 32 #include "ocfs2.h" 33 #include "alloc.h" 34 #include "blockcheck.h" 35 #include "dlmglue.h" 36 #include "file.h" 37 #include "symlink.h" 38 #include "sysfile.h" 39 #include "inode.h" 40 #include "journal.h" 41 #include "ocfs2_fs.h" 42 #include "suballoc.h" 43 #include "uptodate.h" 44 #include "buffer_head_io.h" 45 #include "super.h" 46 #include "xattr.h" 47 #include "refcounttree.h" 48 #include "acl.h" 49 #include "ocfs2_trace.h" 50 51 struct ocfs2_xattr_def_value_root { 52 struct ocfs2_xattr_value_root xv; 53 struct ocfs2_extent_rec er; 54 }; 55 56 struct ocfs2_xattr_bucket { 57 /* The inode these xattrs are associated with */ 58 struct inode *bu_inode; 59 60 /* The actual buffers that make up the bucket */ 61 struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET]; 62 63 /* How many blocks make up one bucket for this filesystem */ 64 int bu_blocks; 65 }; 66 67 struct ocfs2_xattr_set_ctxt { 68 handle_t *handle; 69 struct ocfs2_alloc_context *meta_ac; 70 struct ocfs2_alloc_context *data_ac; 71 struct ocfs2_cached_dealloc_ctxt dealloc; 72 int set_abort; 73 }; 74 75 #define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root)) 76 #define OCFS2_XATTR_INLINE_SIZE 80 77 #define OCFS2_XATTR_HEADER_GAP 4 78 #define OCFS2_XATTR_FREE_IN_IBODY (OCFS2_MIN_XATTR_INLINE_SIZE \ 79 - sizeof(struct ocfs2_xattr_header) \ 80 - OCFS2_XATTR_HEADER_GAP) 81 #define OCFS2_XATTR_FREE_IN_BLOCK(ptr) ((ptr)->i_sb->s_blocksize \ 82 - sizeof(struct ocfs2_xattr_block) \ 83 - sizeof(struct ocfs2_xattr_header) \ 84 - OCFS2_XATTR_HEADER_GAP) 85 86 static struct ocfs2_xattr_def_value_root def_xv = { 87 .xv.xr_list.l_count = cpu_to_le16(1), 88 }; 89 90 const struct xattr_handler *ocfs2_xattr_handlers[] = { 91 &ocfs2_xattr_user_handler, 92 &ocfs2_xattr_trusted_handler, 93 &ocfs2_xattr_security_handler, 94 NULL 95 }; 96 97 static const struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = { 98 [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler, 99 [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS] = &nop_posix_acl_access, 100 [OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT] = &nop_posix_acl_default, 101 [OCFS2_XATTR_INDEX_TRUSTED] = &ocfs2_xattr_trusted_handler, 102 [OCFS2_XATTR_INDEX_SECURITY] = &ocfs2_xattr_security_handler, 103 }; 104 105 struct ocfs2_xattr_info { 106 int xi_name_index; 107 const char *xi_name; 108 int xi_name_len; 109 const void *xi_value; 110 size_t xi_value_len; 111 }; 112 113 struct ocfs2_xattr_search { 114 struct buffer_head *inode_bh; 115 /* 116 * xattr_bh point to the block buffer head which has extended attribute 117 * when extended attribute in inode, xattr_bh is equal to inode_bh. 118 */ 119 struct buffer_head *xattr_bh; 120 struct ocfs2_xattr_header *header; 121 struct ocfs2_xattr_bucket *bucket; 122 void *base; 123 void *end; 124 struct ocfs2_xattr_entry *here; 125 int not_found; 126 }; 127 128 /* Operations on struct ocfs2_xa_entry */ 129 struct ocfs2_xa_loc; 130 struct ocfs2_xa_loc_operations { 131 /* 132 * Journal functions 133 */ 134 int (*xlo_journal_access)(handle_t *handle, struct ocfs2_xa_loc *loc, 135 int type); 136 void (*xlo_journal_dirty)(handle_t *handle, struct ocfs2_xa_loc *loc); 137 138 /* 139 * Return a pointer to the appropriate buffer in loc->xl_storage 140 * at the given offset from loc->xl_header. 141 */ 142 void *(*xlo_offset_pointer)(struct ocfs2_xa_loc *loc, int offset); 143 144 /* Can we reuse the existing entry for the new value? */ 145 int (*xlo_can_reuse)(struct ocfs2_xa_loc *loc, 146 struct ocfs2_xattr_info *xi); 147 148 /* How much space is needed for the new value? */ 149 int (*xlo_check_space)(struct ocfs2_xa_loc *loc, 150 struct ocfs2_xattr_info *xi); 151 152 /* 153 * Return the offset of the first name+value pair. This is 154 * the start of our downward-filling free space. 155 */ 156 int (*xlo_get_free_start)(struct ocfs2_xa_loc *loc); 157 158 /* 159 * Remove the name+value at this location. Do whatever is 160 * appropriate with the remaining name+value pairs. 161 */ 162 void (*xlo_wipe_namevalue)(struct ocfs2_xa_loc *loc); 163 164 /* Fill xl_entry with a new entry */ 165 void (*xlo_add_entry)(struct ocfs2_xa_loc *loc, u32 name_hash); 166 167 /* Add name+value storage to an entry */ 168 void (*xlo_add_namevalue)(struct ocfs2_xa_loc *loc, int size); 169 170 /* 171 * Initialize the value buf's access and bh fields for this entry. 172 * ocfs2_xa_fill_value_buf() will handle the xv pointer. 173 */ 174 void (*xlo_fill_value_buf)(struct ocfs2_xa_loc *loc, 175 struct ocfs2_xattr_value_buf *vb); 176 }; 177 178 /* 179 * Describes an xattr entry location. This is a memory structure 180 * tracking the on-disk structure. 181 */ 182 struct ocfs2_xa_loc { 183 /* This xattr belongs to this inode */ 184 struct inode *xl_inode; 185 186 /* The ocfs2_xattr_header inside the on-disk storage. Not NULL. */ 187 struct ocfs2_xattr_header *xl_header; 188 189 /* Bytes from xl_header to the end of the storage */ 190 int xl_size; 191 192 /* 193 * The ocfs2_xattr_entry this location describes. If this is 194 * NULL, this location describes the on-disk structure where it 195 * would have been. 196 */ 197 struct ocfs2_xattr_entry *xl_entry; 198 199 /* 200 * Internal housekeeping 201 */ 202 203 /* Buffer(s) containing this entry */ 204 void *xl_storage; 205 206 /* Operations on the storage backing this location */ 207 const struct ocfs2_xa_loc_operations *xl_ops; 208 }; 209 210 /* 211 * Convenience functions to calculate how much space is needed for a 212 * given name+value pair 213 */ 214 static int namevalue_size(int name_len, uint64_t value_len) 215 { 216 if (value_len > OCFS2_XATTR_INLINE_SIZE) 217 return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE; 218 else 219 return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len); 220 } 221 222 static int namevalue_size_xi(struct ocfs2_xattr_info *xi) 223 { 224 return namevalue_size(xi->xi_name_len, xi->xi_value_len); 225 } 226 227 static int namevalue_size_xe(struct ocfs2_xattr_entry *xe) 228 { 229 u64 value_len = le64_to_cpu(xe->xe_value_size); 230 231 BUG_ON((value_len > OCFS2_XATTR_INLINE_SIZE) && 232 ocfs2_xattr_is_local(xe)); 233 return namevalue_size(xe->xe_name_len, value_len); 234 } 235 236 237 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb, 238 struct ocfs2_xattr_header *xh, 239 int index, 240 int *block_off, 241 int *new_offset); 242 243 static int ocfs2_xattr_block_find(struct inode *inode, 244 int name_index, 245 const char *name, 246 struct ocfs2_xattr_search *xs); 247 static int ocfs2_xattr_index_block_find(struct inode *inode, 248 struct buffer_head *root_bh, 249 int name_index, 250 const char *name, 251 struct ocfs2_xattr_search *xs); 252 253 static int ocfs2_xattr_tree_list_index_block(struct inode *inode, 254 struct buffer_head *blk_bh, 255 char *buffer, 256 size_t buffer_size); 257 258 static int ocfs2_xattr_create_index_block(struct inode *inode, 259 struct ocfs2_xattr_search *xs, 260 struct ocfs2_xattr_set_ctxt *ctxt); 261 262 static int ocfs2_xattr_set_entry_index_block(struct inode *inode, 263 struct ocfs2_xattr_info *xi, 264 struct ocfs2_xattr_search *xs, 265 struct ocfs2_xattr_set_ctxt *ctxt); 266 267 typedef int (xattr_tree_rec_func)(struct inode *inode, 268 struct buffer_head *root_bh, 269 u64 blkno, u32 cpos, u32 len, void *para); 270 static int ocfs2_iterate_xattr_index_block(struct inode *inode, 271 struct buffer_head *root_bh, 272 xattr_tree_rec_func *rec_func, 273 void *para); 274 static int ocfs2_delete_xattr_in_bucket(struct inode *inode, 275 struct ocfs2_xattr_bucket *bucket, 276 void *para); 277 static int ocfs2_rm_xattr_cluster(struct inode *inode, 278 struct buffer_head *root_bh, 279 u64 blkno, 280 u32 cpos, 281 u32 len, 282 void *para); 283 284 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle, 285 u64 src_blk, u64 last_blk, u64 to_blk, 286 unsigned int start_bucket, 287 u32 *first_hash); 288 static int ocfs2_prepare_refcount_xattr(struct inode *inode, 289 struct ocfs2_dinode *di, 290 struct ocfs2_xattr_info *xi, 291 struct ocfs2_xattr_search *xis, 292 struct ocfs2_xattr_search *xbs, 293 struct ocfs2_refcount_tree **ref_tree, 294 int *meta_need, 295 int *credits); 296 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb, 297 struct ocfs2_xattr_bucket *bucket, 298 int offset, 299 struct ocfs2_xattr_value_root **xv, 300 struct buffer_head **bh); 301 302 static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb) 303 { 304 return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE; 305 } 306 307 static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb) 308 { 309 return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits); 310 } 311 312 #define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr) 313 #define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data) 314 #define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0)) 315 316 static struct ocfs2_xattr_bucket *ocfs2_xattr_bucket_new(struct inode *inode) 317 { 318 struct ocfs2_xattr_bucket *bucket; 319 int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 320 321 BUG_ON(blks > OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET); 322 323 bucket = kzalloc(sizeof(struct ocfs2_xattr_bucket), GFP_NOFS); 324 if (bucket) { 325 bucket->bu_inode = inode; 326 bucket->bu_blocks = blks; 327 } 328 329 return bucket; 330 } 331 332 static void ocfs2_xattr_bucket_relse(struct ocfs2_xattr_bucket *bucket) 333 { 334 int i; 335 336 for (i = 0; i < bucket->bu_blocks; i++) { 337 brelse(bucket->bu_bhs[i]); 338 bucket->bu_bhs[i] = NULL; 339 } 340 } 341 342 static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket) 343 { 344 if (bucket) { 345 ocfs2_xattr_bucket_relse(bucket); 346 bucket->bu_inode = NULL; 347 kfree(bucket); 348 } 349 } 350 351 /* 352 * A bucket that has never been written to disk doesn't need to be 353 * read. We just need the buffer_heads. Don't call this for 354 * buckets that are already on disk. ocfs2_read_xattr_bucket() initializes 355 * them fully. 356 */ 357 static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket, 358 u64 xb_blkno, int new) 359 { 360 int i, rc = 0; 361 362 for (i = 0; i < bucket->bu_blocks; i++) { 363 bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb, 364 xb_blkno + i); 365 if (!bucket->bu_bhs[i]) { 366 rc = -ENOMEM; 367 mlog_errno(rc); 368 break; 369 } 370 371 if (!ocfs2_buffer_uptodate(INODE_CACHE(bucket->bu_inode), 372 bucket->bu_bhs[i])) { 373 if (new) 374 ocfs2_set_new_buffer_uptodate(INODE_CACHE(bucket->bu_inode), 375 bucket->bu_bhs[i]); 376 else { 377 set_buffer_uptodate(bucket->bu_bhs[i]); 378 ocfs2_set_buffer_uptodate(INODE_CACHE(bucket->bu_inode), 379 bucket->bu_bhs[i]); 380 } 381 } 382 } 383 384 if (rc) 385 ocfs2_xattr_bucket_relse(bucket); 386 return rc; 387 } 388 389 /* Read the xattr bucket at xb_blkno */ 390 static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket, 391 u64 xb_blkno) 392 { 393 int rc; 394 395 rc = ocfs2_read_blocks(INODE_CACHE(bucket->bu_inode), xb_blkno, 396 bucket->bu_blocks, bucket->bu_bhs, 0, 397 NULL); 398 if (!rc) { 399 spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 400 rc = ocfs2_validate_meta_ecc_bhs(bucket->bu_inode->i_sb, 401 bucket->bu_bhs, 402 bucket->bu_blocks, 403 &bucket_xh(bucket)->xh_check); 404 spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 405 if (rc) 406 mlog_errno(rc); 407 } 408 409 if (rc) 410 ocfs2_xattr_bucket_relse(bucket); 411 return rc; 412 } 413 414 static int ocfs2_xattr_bucket_journal_access(handle_t *handle, 415 struct ocfs2_xattr_bucket *bucket, 416 int type) 417 { 418 int i, rc = 0; 419 420 for (i = 0; i < bucket->bu_blocks; i++) { 421 rc = ocfs2_journal_access(handle, 422 INODE_CACHE(bucket->bu_inode), 423 bucket->bu_bhs[i], type); 424 if (rc) { 425 mlog_errno(rc); 426 break; 427 } 428 } 429 430 return rc; 431 } 432 433 static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle, 434 struct ocfs2_xattr_bucket *bucket) 435 { 436 int i; 437 438 spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 439 ocfs2_compute_meta_ecc_bhs(bucket->bu_inode->i_sb, 440 bucket->bu_bhs, bucket->bu_blocks, 441 &bucket_xh(bucket)->xh_check); 442 spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 443 444 for (i = 0; i < bucket->bu_blocks; i++) 445 ocfs2_journal_dirty(handle, bucket->bu_bhs[i]); 446 } 447 448 static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest, 449 struct ocfs2_xattr_bucket *src) 450 { 451 int i; 452 int blocksize = src->bu_inode->i_sb->s_blocksize; 453 454 BUG_ON(dest->bu_blocks != src->bu_blocks); 455 BUG_ON(dest->bu_inode != src->bu_inode); 456 457 for (i = 0; i < src->bu_blocks; i++) { 458 memcpy(bucket_block(dest, i), bucket_block(src, i), 459 blocksize); 460 } 461 } 462 463 static int ocfs2_validate_xattr_block(struct super_block *sb, 464 struct buffer_head *bh) 465 { 466 int rc; 467 struct ocfs2_xattr_block *xb = 468 (struct ocfs2_xattr_block *)bh->b_data; 469 470 trace_ocfs2_validate_xattr_block((unsigned long long)bh->b_blocknr); 471 472 BUG_ON(!buffer_uptodate(bh)); 473 474 /* 475 * If the ecc fails, we return the error but otherwise 476 * leave the filesystem running. We know any error is 477 * local to this block. 478 */ 479 rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &xb->xb_check); 480 if (rc) 481 return rc; 482 483 /* 484 * Errors after here are fatal 485 */ 486 487 if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) { 488 return ocfs2_error(sb, 489 "Extended attribute block #%llu has bad signature %.*s\n", 490 (unsigned long long)bh->b_blocknr, 7, 491 xb->xb_signature); 492 } 493 494 if (le64_to_cpu(xb->xb_blkno) != bh->b_blocknr) { 495 return ocfs2_error(sb, 496 "Extended attribute block #%llu has an invalid xb_blkno of %llu\n", 497 (unsigned long long)bh->b_blocknr, 498 (unsigned long long)le64_to_cpu(xb->xb_blkno)); 499 } 500 501 if (le32_to_cpu(xb->xb_fs_generation) != OCFS2_SB(sb)->fs_generation) { 502 return ocfs2_error(sb, 503 "Extended attribute block #%llu has an invalid xb_fs_generation of #%u\n", 504 (unsigned long long)bh->b_blocknr, 505 le32_to_cpu(xb->xb_fs_generation)); 506 } 507 508 return 0; 509 } 510 511 static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno, 512 struct buffer_head **bh) 513 { 514 int rc; 515 struct buffer_head *tmp = *bh; 516 517 rc = ocfs2_read_block(INODE_CACHE(inode), xb_blkno, &tmp, 518 ocfs2_validate_xattr_block); 519 520 /* If ocfs2_read_block() got us a new bh, pass it up. */ 521 if (!rc && !*bh) 522 *bh = tmp; 523 524 return rc; 525 } 526 527 static inline const char *ocfs2_xattr_prefix(int name_index) 528 { 529 const struct xattr_handler *handler = NULL; 530 531 if (name_index > 0 && name_index < OCFS2_XATTR_MAX) 532 handler = ocfs2_xattr_handler_map[name_index]; 533 return handler ? xattr_prefix(handler) : NULL; 534 } 535 536 static u32 ocfs2_xattr_name_hash(struct inode *inode, 537 const char *name, 538 int name_len) 539 { 540 /* Get hash value of uuid from super block */ 541 u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash; 542 int i; 543 544 /* hash extended attribute name */ 545 for (i = 0; i < name_len; i++) { 546 hash = (hash << OCFS2_HASH_SHIFT) ^ 547 (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^ 548 *name++; 549 } 550 551 return hash; 552 } 553 554 static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len) 555 { 556 return namevalue_size(name_len, value_len) + 557 sizeof(struct ocfs2_xattr_entry); 558 } 559 560 static int ocfs2_xi_entry_usage(struct ocfs2_xattr_info *xi) 561 { 562 return namevalue_size_xi(xi) + 563 sizeof(struct ocfs2_xattr_entry); 564 } 565 566 static int ocfs2_xe_entry_usage(struct ocfs2_xattr_entry *xe) 567 { 568 return namevalue_size_xe(xe) + 569 sizeof(struct ocfs2_xattr_entry); 570 } 571 572 int ocfs2_calc_security_init(struct inode *dir, 573 struct ocfs2_security_xattr_info *si, 574 int *want_clusters, 575 int *xattr_credits, 576 struct ocfs2_alloc_context **xattr_ac) 577 { 578 int ret = 0; 579 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); 580 int s_size = ocfs2_xattr_entry_real_size(strlen(si->name), 581 si->value_len); 582 583 /* 584 * The max space of security xattr taken inline is 585 * 256(name) + 80(value) + 16(entry) = 352 bytes, 586 * So reserve one metadata block for it is ok. 587 */ 588 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE || 589 s_size > OCFS2_XATTR_FREE_IN_IBODY) { 590 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac); 591 if (ret) { 592 mlog_errno(ret); 593 return ret; 594 } 595 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; 596 } 597 598 /* reserve clusters for xattr value which will be set in B tree*/ 599 if (si->value_len > OCFS2_XATTR_INLINE_SIZE) { 600 int new_clusters = ocfs2_clusters_for_bytes(dir->i_sb, 601 si->value_len); 602 603 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb, 604 new_clusters); 605 *want_clusters += new_clusters; 606 } 607 return ret; 608 } 609 610 int ocfs2_calc_xattr_init(struct inode *dir, 611 struct buffer_head *dir_bh, 612 umode_t mode, 613 struct ocfs2_security_xattr_info *si, 614 int *want_clusters, 615 int *xattr_credits, 616 int *want_meta) 617 { 618 int ret = 0; 619 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); 620 int s_size = 0, a_size = 0, acl_len = 0, new_clusters; 621 622 if (si->enable) 623 s_size = ocfs2_xattr_entry_real_size(strlen(si->name), 624 si->value_len); 625 626 if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) { 627 down_read(&OCFS2_I(dir)->ip_xattr_sem); 628 acl_len = ocfs2_xattr_get_nolock(dir, dir_bh, 629 OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT, 630 "", NULL, 0); 631 up_read(&OCFS2_I(dir)->ip_xattr_sem); 632 if (acl_len > 0) { 633 a_size = ocfs2_xattr_entry_real_size(0, acl_len); 634 if (S_ISDIR(mode)) 635 a_size <<= 1; 636 } else if (acl_len != 0 && acl_len != -ENODATA) { 637 ret = acl_len; 638 mlog_errno(ret); 639 return ret; 640 } 641 } 642 643 if (!(s_size + a_size)) 644 return ret; 645 646 /* 647 * The max space of security xattr taken inline is 648 * 256(name) + 80(value) + 16(entry) = 352 bytes, 649 * The max space of acl xattr taken inline is 650 * 80(value) + 16(entry) * 2(if directory) = 192 bytes, 651 * when blocksize = 512, may reserve one more cluser for 652 * xattr bucket, otherwise reserve one metadata block 653 * for them is ok. 654 * If this is a new directory with inline data, 655 * we choose to reserve the entire inline area for 656 * directory contents and force an external xattr block. 657 */ 658 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE || 659 (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) || 660 (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) { 661 *want_meta = *want_meta + 1; 662 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; 663 } 664 665 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE && 666 (s_size + a_size) > OCFS2_XATTR_FREE_IN_BLOCK(dir)) { 667 *want_clusters += 1; 668 *xattr_credits += ocfs2_blocks_per_xattr_bucket(dir->i_sb); 669 } 670 671 /* 672 * reserve credits and clusters for xattrs which has large value 673 * and have to be set outside 674 */ 675 if (si->enable && si->value_len > OCFS2_XATTR_INLINE_SIZE) { 676 new_clusters = ocfs2_clusters_for_bytes(dir->i_sb, 677 si->value_len); 678 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb, 679 new_clusters); 680 *want_clusters += new_clusters; 681 } 682 if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL && 683 acl_len > OCFS2_XATTR_INLINE_SIZE) { 684 /* for directory, it has DEFAULT and ACCESS two types of acls */ 685 new_clusters = (S_ISDIR(mode) ? 2 : 1) * 686 ocfs2_clusters_for_bytes(dir->i_sb, acl_len); 687 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb, 688 new_clusters); 689 *want_clusters += new_clusters; 690 } 691 692 return ret; 693 } 694 695 static int ocfs2_xattr_extend_allocation(struct inode *inode, 696 u32 clusters_to_add, 697 struct ocfs2_xattr_value_buf *vb, 698 struct ocfs2_xattr_set_ctxt *ctxt) 699 { 700 int status = 0, credits; 701 handle_t *handle = ctxt->handle; 702 enum ocfs2_alloc_restarted why; 703 u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters); 704 struct ocfs2_extent_tree et; 705 706 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); 707 708 while (clusters_to_add) { 709 trace_ocfs2_xattr_extend_allocation(clusters_to_add); 710 711 status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh, 712 OCFS2_JOURNAL_ACCESS_WRITE); 713 if (status < 0) { 714 mlog_errno(status); 715 break; 716 } 717 718 prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters); 719 status = ocfs2_add_clusters_in_btree(handle, 720 &et, 721 &logical_start, 722 clusters_to_add, 723 0, 724 ctxt->data_ac, 725 ctxt->meta_ac, 726 &why); 727 if ((status < 0) && (status != -EAGAIN)) { 728 if (status != -ENOSPC) 729 mlog_errno(status); 730 break; 731 } 732 733 ocfs2_journal_dirty(handle, vb->vb_bh); 734 735 clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - 736 prev_clusters; 737 738 if (why != RESTART_NONE && clusters_to_add) { 739 /* 740 * We can only fail in case the alloc file doesn't give 741 * up enough clusters. 742 */ 743 BUG_ON(why == RESTART_META); 744 745 credits = ocfs2_calc_extend_credits(inode->i_sb, 746 &vb->vb_xv->xr_list); 747 status = ocfs2_extend_trans(handle, credits); 748 if (status < 0) { 749 status = -ENOMEM; 750 mlog_errno(status); 751 break; 752 } 753 } 754 } 755 756 return status; 757 } 758 759 static int __ocfs2_remove_xattr_range(struct inode *inode, 760 struct ocfs2_xattr_value_buf *vb, 761 u32 cpos, u32 phys_cpos, u32 len, 762 unsigned int ext_flags, 763 struct ocfs2_xattr_set_ctxt *ctxt) 764 { 765 int ret; 766 u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); 767 handle_t *handle = ctxt->handle; 768 struct ocfs2_extent_tree et; 769 770 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); 771 772 ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh, 773 OCFS2_JOURNAL_ACCESS_WRITE); 774 if (ret) { 775 mlog_errno(ret); 776 goto out; 777 } 778 779 ret = ocfs2_remove_extent(handle, &et, cpos, len, ctxt->meta_ac, 780 &ctxt->dealloc); 781 if (ret) { 782 mlog_errno(ret); 783 goto out; 784 } 785 786 le32_add_cpu(&vb->vb_xv->xr_clusters, -len); 787 ocfs2_journal_dirty(handle, vb->vb_bh); 788 789 if (ext_flags & OCFS2_EXT_REFCOUNTED) 790 ret = ocfs2_decrease_refcount(inode, handle, 791 ocfs2_blocks_to_clusters(inode->i_sb, 792 phys_blkno), 793 len, ctxt->meta_ac, &ctxt->dealloc, 1); 794 else 795 ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc, 796 phys_blkno, len); 797 if (ret) 798 mlog_errno(ret); 799 800 out: 801 return ret; 802 } 803 804 static int ocfs2_xattr_shrink_size(struct inode *inode, 805 u32 old_clusters, 806 u32 new_clusters, 807 struct ocfs2_xattr_value_buf *vb, 808 struct ocfs2_xattr_set_ctxt *ctxt) 809 { 810 int ret = 0; 811 unsigned int ext_flags; 812 u32 trunc_len, cpos, phys_cpos, alloc_size; 813 u64 block; 814 815 if (old_clusters <= new_clusters) 816 return 0; 817 818 cpos = new_clusters; 819 trunc_len = old_clusters - new_clusters; 820 while (trunc_len) { 821 ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos, 822 &alloc_size, 823 &vb->vb_xv->xr_list, &ext_flags); 824 if (ret) { 825 mlog_errno(ret); 826 goto out; 827 } 828 829 if (alloc_size > trunc_len) 830 alloc_size = trunc_len; 831 832 ret = __ocfs2_remove_xattr_range(inode, vb, cpos, 833 phys_cpos, alloc_size, 834 ext_flags, ctxt); 835 if (ret) { 836 mlog_errno(ret); 837 goto out; 838 } 839 840 block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); 841 ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), 842 block, alloc_size); 843 cpos += alloc_size; 844 trunc_len -= alloc_size; 845 } 846 847 out: 848 return ret; 849 } 850 851 static int ocfs2_xattr_value_truncate(struct inode *inode, 852 struct ocfs2_xattr_value_buf *vb, 853 int len, 854 struct ocfs2_xattr_set_ctxt *ctxt) 855 { 856 int ret; 857 u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len); 858 u32 old_clusters = le32_to_cpu(vb->vb_xv->xr_clusters); 859 860 if (new_clusters == old_clusters) 861 return 0; 862 863 if (new_clusters > old_clusters) 864 ret = ocfs2_xattr_extend_allocation(inode, 865 new_clusters - old_clusters, 866 vb, ctxt); 867 else 868 ret = ocfs2_xattr_shrink_size(inode, 869 old_clusters, new_clusters, 870 vb, ctxt); 871 872 return ret; 873 } 874 875 static int ocfs2_xattr_list_entry(struct super_block *sb, 876 char *buffer, size_t size, 877 size_t *result, int type, 878 const char *name, int name_len) 879 { 880 char *p = buffer + *result; 881 const char *prefix; 882 int prefix_len; 883 int total_len; 884 885 switch(type) { 886 case OCFS2_XATTR_INDEX_USER: 887 if (OCFS2_SB(sb)->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 888 return 0; 889 break; 890 891 case OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS: 892 case OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT: 893 if (!(sb->s_flags & SB_POSIXACL)) 894 return 0; 895 break; 896 897 case OCFS2_XATTR_INDEX_TRUSTED: 898 if (!capable(CAP_SYS_ADMIN)) 899 return 0; 900 break; 901 } 902 903 prefix = ocfs2_xattr_prefix(type); 904 if (!prefix) 905 return 0; 906 prefix_len = strlen(prefix); 907 total_len = prefix_len + name_len + 1; 908 *result += total_len; 909 910 /* we are just looking for how big our buffer needs to be */ 911 if (!size) 912 return 0; 913 914 if (*result > size) 915 return -ERANGE; 916 917 memcpy(p, prefix, prefix_len); 918 memcpy(p + prefix_len, name, name_len); 919 p[prefix_len + name_len] = '\0'; 920 921 return 0; 922 } 923 924 static int ocfs2_xattr_list_entries(struct inode *inode, 925 struct ocfs2_xattr_header *header, 926 char *buffer, size_t buffer_size) 927 { 928 size_t result = 0; 929 int i, type, ret; 930 const char *name; 931 932 for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) { 933 struct ocfs2_xattr_entry *entry = &header->xh_entries[i]; 934 type = ocfs2_xattr_get_type(entry); 935 name = (const char *)header + 936 le16_to_cpu(entry->xe_name_offset); 937 938 ret = ocfs2_xattr_list_entry(inode->i_sb, 939 buffer, buffer_size, 940 &result, type, name, 941 entry->xe_name_len); 942 if (ret) 943 return ret; 944 } 945 946 return result; 947 } 948 949 int ocfs2_has_inline_xattr_value_outside(struct inode *inode, 950 struct ocfs2_dinode *di) 951 { 952 struct ocfs2_xattr_header *xh; 953 int i; 954 955 xh = (struct ocfs2_xattr_header *) 956 ((void *)di + inode->i_sb->s_blocksize - 957 le16_to_cpu(di->i_xattr_inline_size)); 958 959 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) 960 if (!ocfs2_xattr_is_local(&xh->xh_entries[i])) 961 return 1; 962 963 return 0; 964 } 965 966 static int ocfs2_xattr_ibody_list(struct inode *inode, 967 struct ocfs2_dinode *di, 968 char *buffer, 969 size_t buffer_size) 970 { 971 struct ocfs2_xattr_header *header = NULL; 972 struct ocfs2_inode_info *oi = OCFS2_I(inode); 973 int ret = 0; 974 975 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) 976 return ret; 977 978 header = (struct ocfs2_xattr_header *) 979 ((void *)di + inode->i_sb->s_blocksize - 980 le16_to_cpu(di->i_xattr_inline_size)); 981 982 ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size); 983 984 return ret; 985 } 986 987 static int ocfs2_xattr_block_list(struct inode *inode, 988 struct ocfs2_dinode *di, 989 char *buffer, 990 size_t buffer_size) 991 { 992 struct buffer_head *blk_bh = NULL; 993 struct ocfs2_xattr_block *xb; 994 int ret = 0; 995 996 if (!di->i_xattr_loc) 997 return ret; 998 999 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc), 1000 &blk_bh); 1001 if (ret < 0) { 1002 mlog_errno(ret); 1003 return ret; 1004 } 1005 1006 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 1007 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 1008 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header; 1009 ret = ocfs2_xattr_list_entries(inode, header, 1010 buffer, buffer_size); 1011 } else 1012 ret = ocfs2_xattr_tree_list_index_block(inode, blk_bh, 1013 buffer, buffer_size); 1014 1015 brelse(blk_bh); 1016 1017 return ret; 1018 } 1019 1020 ssize_t ocfs2_listxattr(struct dentry *dentry, 1021 char *buffer, 1022 size_t size) 1023 { 1024 int ret = 0, i_ret = 0, b_ret = 0; 1025 struct buffer_head *di_bh = NULL; 1026 struct ocfs2_dinode *di = NULL; 1027 struct ocfs2_inode_info *oi = OCFS2_I(d_inode(dentry)); 1028 1029 if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb))) 1030 return -EOPNOTSUPP; 1031 1032 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) 1033 return ret; 1034 1035 ret = ocfs2_inode_lock(d_inode(dentry), &di_bh, 0); 1036 if (ret < 0) { 1037 mlog_errno(ret); 1038 return ret; 1039 } 1040 1041 di = (struct ocfs2_dinode *)di_bh->b_data; 1042 1043 down_read(&oi->ip_xattr_sem); 1044 i_ret = ocfs2_xattr_ibody_list(d_inode(dentry), di, buffer, size); 1045 if (i_ret < 0) 1046 b_ret = 0; 1047 else { 1048 if (buffer) { 1049 buffer += i_ret; 1050 size -= i_ret; 1051 } 1052 b_ret = ocfs2_xattr_block_list(d_inode(dentry), di, 1053 buffer, size); 1054 if (b_ret < 0) 1055 i_ret = 0; 1056 } 1057 up_read(&oi->ip_xattr_sem); 1058 ocfs2_inode_unlock(d_inode(dentry), 0); 1059 1060 brelse(di_bh); 1061 1062 return i_ret + b_ret; 1063 } 1064 1065 static int ocfs2_xattr_find_entry(struct inode *inode, int name_index, 1066 const char *name, 1067 struct ocfs2_xattr_search *xs) 1068 { 1069 struct ocfs2_xattr_entry *entry; 1070 size_t name_len; 1071 int i, name_offset, cmp = 1; 1072 1073 if (name == NULL) 1074 return -EINVAL; 1075 1076 name_len = strlen(name); 1077 entry = xs->here; 1078 for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) { 1079 if ((void *)entry >= xs->end) { 1080 ocfs2_error(inode->i_sb, "corrupted xattr entries"); 1081 return -EFSCORRUPTED; 1082 } 1083 cmp = name_index - ocfs2_xattr_get_type(entry); 1084 if (!cmp) 1085 cmp = name_len - entry->xe_name_len; 1086 if (!cmp) { 1087 name_offset = le16_to_cpu(entry->xe_name_offset); 1088 if ((xs->base + name_offset + name_len) > xs->end) { 1089 ocfs2_error(inode->i_sb, 1090 "corrupted xattr entries"); 1091 return -EFSCORRUPTED; 1092 } 1093 cmp = memcmp(name, (xs->base + name_offset), name_len); 1094 } 1095 if (cmp == 0) 1096 break; 1097 entry += 1; 1098 } 1099 xs->here = entry; 1100 1101 return cmp ? -ENODATA : 0; 1102 } 1103 1104 static int ocfs2_xattr_get_value_outside(struct inode *inode, 1105 struct ocfs2_xattr_value_root *xv, 1106 void *buffer, 1107 size_t len) 1108 { 1109 u32 cpos, p_cluster, num_clusters, bpc, clusters; 1110 u64 blkno; 1111 int i, ret = 0; 1112 size_t cplen, blocksize; 1113 struct buffer_head *bh = NULL; 1114 struct ocfs2_extent_list *el; 1115 1116 el = &xv->xr_list; 1117 clusters = le32_to_cpu(xv->xr_clusters); 1118 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 1119 blocksize = inode->i_sb->s_blocksize; 1120 1121 cpos = 0; 1122 while (cpos < clusters) { 1123 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, 1124 &num_clusters, el, NULL); 1125 if (ret) { 1126 mlog_errno(ret); 1127 goto out; 1128 } 1129 1130 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 1131 /* Copy ocfs2_xattr_value */ 1132 for (i = 0; i < num_clusters * bpc; i++, blkno++) { 1133 ret = ocfs2_read_block(INODE_CACHE(inode), blkno, 1134 &bh, NULL); 1135 if (ret) { 1136 mlog_errno(ret); 1137 goto out; 1138 } 1139 1140 cplen = len >= blocksize ? blocksize : len; 1141 memcpy(buffer, bh->b_data, cplen); 1142 len -= cplen; 1143 buffer += cplen; 1144 1145 brelse(bh); 1146 bh = NULL; 1147 if (len == 0) 1148 break; 1149 } 1150 cpos += num_clusters; 1151 } 1152 out: 1153 return ret; 1154 } 1155 1156 static int ocfs2_xattr_ibody_get(struct inode *inode, 1157 int name_index, 1158 const char *name, 1159 void *buffer, 1160 size_t buffer_size, 1161 struct ocfs2_xattr_search *xs) 1162 { 1163 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1164 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 1165 struct ocfs2_xattr_value_root *xv; 1166 size_t size; 1167 int ret = 0; 1168 1169 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) 1170 return -ENODATA; 1171 1172 xs->end = (void *)di + inode->i_sb->s_blocksize; 1173 xs->header = (struct ocfs2_xattr_header *) 1174 (xs->end - le16_to_cpu(di->i_xattr_inline_size)); 1175 xs->base = (void *)xs->header; 1176 xs->here = xs->header->xh_entries; 1177 1178 ret = ocfs2_xattr_find_entry(inode, name_index, name, xs); 1179 if (ret) 1180 return ret; 1181 size = le64_to_cpu(xs->here->xe_value_size); 1182 if (buffer) { 1183 if (size > buffer_size) 1184 return -ERANGE; 1185 if (ocfs2_xattr_is_local(xs->here)) { 1186 memcpy(buffer, (void *)xs->base + 1187 le16_to_cpu(xs->here->xe_name_offset) + 1188 OCFS2_XATTR_SIZE(xs->here->xe_name_len), size); 1189 } else { 1190 xv = (struct ocfs2_xattr_value_root *) 1191 (xs->base + le16_to_cpu( 1192 xs->here->xe_name_offset) + 1193 OCFS2_XATTR_SIZE(xs->here->xe_name_len)); 1194 ret = ocfs2_xattr_get_value_outside(inode, xv, 1195 buffer, size); 1196 if (ret < 0) { 1197 mlog_errno(ret); 1198 return ret; 1199 } 1200 } 1201 } 1202 1203 return size; 1204 } 1205 1206 static int ocfs2_xattr_block_get(struct inode *inode, 1207 int name_index, 1208 const char *name, 1209 void *buffer, 1210 size_t buffer_size, 1211 struct ocfs2_xattr_search *xs) 1212 { 1213 struct ocfs2_xattr_block *xb; 1214 struct ocfs2_xattr_value_root *xv; 1215 size_t size; 1216 int ret = -ENODATA, name_offset, name_len, i; 1217 int block_off; 1218 1219 xs->bucket = ocfs2_xattr_bucket_new(inode); 1220 if (!xs->bucket) { 1221 ret = -ENOMEM; 1222 mlog_errno(ret); 1223 goto cleanup; 1224 } 1225 1226 ret = ocfs2_xattr_block_find(inode, name_index, name, xs); 1227 if (ret) { 1228 mlog_errno(ret); 1229 goto cleanup; 1230 } 1231 1232 if (xs->not_found) { 1233 ret = -ENODATA; 1234 goto cleanup; 1235 } 1236 1237 xb = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; 1238 size = le64_to_cpu(xs->here->xe_value_size); 1239 if (buffer) { 1240 ret = -ERANGE; 1241 if (size > buffer_size) 1242 goto cleanup; 1243 1244 name_offset = le16_to_cpu(xs->here->xe_name_offset); 1245 name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len); 1246 i = xs->here - xs->header->xh_entries; 1247 1248 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 1249 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 1250 bucket_xh(xs->bucket), 1251 i, 1252 &block_off, 1253 &name_offset); 1254 if (ret) { 1255 mlog_errno(ret); 1256 goto cleanup; 1257 } 1258 xs->base = bucket_block(xs->bucket, block_off); 1259 } 1260 if (ocfs2_xattr_is_local(xs->here)) { 1261 memcpy(buffer, (void *)xs->base + 1262 name_offset + name_len, size); 1263 } else { 1264 xv = (struct ocfs2_xattr_value_root *) 1265 (xs->base + name_offset + name_len); 1266 ret = ocfs2_xattr_get_value_outside(inode, xv, 1267 buffer, size); 1268 if (ret < 0) { 1269 mlog_errno(ret); 1270 goto cleanup; 1271 } 1272 } 1273 } 1274 ret = size; 1275 cleanup: 1276 ocfs2_xattr_bucket_free(xs->bucket); 1277 1278 brelse(xs->xattr_bh); 1279 xs->xattr_bh = NULL; 1280 return ret; 1281 } 1282 1283 int ocfs2_xattr_get_nolock(struct inode *inode, 1284 struct buffer_head *di_bh, 1285 int name_index, 1286 const char *name, 1287 void *buffer, 1288 size_t buffer_size) 1289 { 1290 int ret; 1291 struct ocfs2_dinode *di = NULL; 1292 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1293 struct ocfs2_xattr_search xis = { 1294 .not_found = -ENODATA, 1295 }; 1296 struct ocfs2_xattr_search xbs = { 1297 .not_found = -ENODATA, 1298 }; 1299 1300 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) 1301 return -EOPNOTSUPP; 1302 1303 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) 1304 return -ENODATA; 1305 1306 xis.inode_bh = xbs.inode_bh = di_bh; 1307 di = (struct ocfs2_dinode *)di_bh->b_data; 1308 1309 ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer, 1310 buffer_size, &xis); 1311 if (ret == -ENODATA && di->i_xattr_loc) 1312 ret = ocfs2_xattr_block_get(inode, name_index, name, buffer, 1313 buffer_size, &xbs); 1314 1315 return ret; 1316 } 1317 1318 /* ocfs2_xattr_get() 1319 * 1320 * Copy an extended attribute into the buffer provided. 1321 * Buffer is NULL to compute the size of buffer required. 1322 */ 1323 static int ocfs2_xattr_get(struct inode *inode, 1324 int name_index, 1325 const char *name, 1326 void *buffer, 1327 size_t buffer_size) 1328 { 1329 int ret, had_lock; 1330 struct buffer_head *di_bh = NULL; 1331 struct ocfs2_lock_holder oh; 1332 1333 had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 0, &oh); 1334 if (had_lock < 0) { 1335 mlog_errno(had_lock); 1336 return had_lock; 1337 } 1338 down_read(&OCFS2_I(inode)->ip_xattr_sem); 1339 ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index, 1340 name, buffer, buffer_size); 1341 up_read(&OCFS2_I(inode)->ip_xattr_sem); 1342 1343 ocfs2_inode_unlock_tracker(inode, 0, &oh, had_lock); 1344 1345 brelse(di_bh); 1346 1347 return ret; 1348 } 1349 1350 static int __ocfs2_xattr_set_value_outside(struct inode *inode, 1351 handle_t *handle, 1352 struct ocfs2_xattr_value_buf *vb, 1353 const void *value, 1354 int value_len) 1355 { 1356 int ret = 0, i, cp_len; 1357 u16 blocksize = inode->i_sb->s_blocksize; 1358 u32 p_cluster, num_clusters; 1359 u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 1360 u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len); 1361 u64 blkno; 1362 struct buffer_head *bh = NULL; 1363 unsigned int ext_flags; 1364 struct ocfs2_xattr_value_root *xv = vb->vb_xv; 1365 1366 BUG_ON(clusters > le32_to_cpu(xv->xr_clusters)); 1367 1368 while (cpos < clusters) { 1369 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, 1370 &num_clusters, &xv->xr_list, 1371 &ext_flags); 1372 if (ret) { 1373 mlog_errno(ret); 1374 goto out; 1375 } 1376 1377 BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED); 1378 1379 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 1380 1381 for (i = 0; i < num_clusters * bpc; i++, blkno++) { 1382 ret = ocfs2_read_block(INODE_CACHE(inode), blkno, 1383 &bh, NULL); 1384 if (ret) { 1385 mlog_errno(ret); 1386 goto out; 1387 } 1388 1389 ret = ocfs2_journal_access(handle, 1390 INODE_CACHE(inode), 1391 bh, 1392 OCFS2_JOURNAL_ACCESS_WRITE); 1393 if (ret < 0) { 1394 mlog_errno(ret); 1395 goto out; 1396 } 1397 1398 cp_len = value_len > blocksize ? blocksize : value_len; 1399 memcpy(bh->b_data, value, cp_len); 1400 value_len -= cp_len; 1401 value += cp_len; 1402 if (cp_len < blocksize) 1403 memset(bh->b_data + cp_len, 0, 1404 blocksize - cp_len); 1405 1406 ocfs2_journal_dirty(handle, bh); 1407 brelse(bh); 1408 bh = NULL; 1409 1410 /* 1411 * XXX: do we need to empty all the following 1412 * blocks in this cluster? 1413 */ 1414 if (!value_len) 1415 break; 1416 } 1417 cpos += num_clusters; 1418 } 1419 out: 1420 brelse(bh); 1421 1422 return ret; 1423 } 1424 1425 static int ocfs2_xa_check_space_helper(int needed_space, int free_start, 1426 int num_entries) 1427 { 1428 int free_space; 1429 1430 if (!needed_space) 1431 return 0; 1432 1433 free_space = free_start - 1434 sizeof(struct ocfs2_xattr_header) - 1435 (num_entries * sizeof(struct ocfs2_xattr_entry)) - 1436 OCFS2_XATTR_HEADER_GAP; 1437 if (free_space < 0) 1438 return -EIO; 1439 if (free_space < needed_space) 1440 return -ENOSPC; 1441 1442 return 0; 1443 } 1444 1445 static int ocfs2_xa_journal_access(handle_t *handle, struct ocfs2_xa_loc *loc, 1446 int type) 1447 { 1448 return loc->xl_ops->xlo_journal_access(handle, loc, type); 1449 } 1450 1451 static void ocfs2_xa_journal_dirty(handle_t *handle, struct ocfs2_xa_loc *loc) 1452 { 1453 loc->xl_ops->xlo_journal_dirty(handle, loc); 1454 } 1455 1456 /* Give a pointer into the storage for the given offset */ 1457 static void *ocfs2_xa_offset_pointer(struct ocfs2_xa_loc *loc, int offset) 1458 { 1459 BUG_ON(offset >= loc->xl_size); 1460 return loc->xl_ops->xlo_offset_pointer(loc, offset); 1461 } 1462 1463 /* 1464 * Wipe the name+value pair and allow the storage to reclaim it. This 1465 * must be followed by either removal of the entry or a call to 1466 * ocfs2_xa_add_namevalue(). 1467 */ 1468 static void ocfs2_xa_wipe_namevalue(struct ocfs2_xa_loc *loc) 1469 { 1470 loc->xl_ops->xlo_wipe_namevalue(loc); 1471 } 1472 1473 /* 1474 * Find lowest offset to a name+value pair. This is the start of our 1475 * downward-growing free space. 1476 */ 1477 static int ocfs2_xa_get_free_start(struct ocfs2_xa_loc *loc) 1478 { 1479 return loc->xl_ops->xlo_get_free_start(loc); 1480 } 1481 1482 /* Can we reuse loc->xl_entry for xi? */ 1483 static int ocfs2_xa_can_reuse_entry(struct ocfs2_xa_loc *loc, 1484 struct ocfs2_xattr_info *xi) 1485 { 1486 return loc->xl_ops->xlo_can_reuse(loc, xi); 1487 } 1488 1489 /* How much free space is needed to set the new value */ 1490 static int ocfs2_xa_check_space(struct ocfs2_xa_loc *loc, 1491 struct ocfs2_xattr_info *xi) 1492 { 1493 return loc->xl_ops->xlo_check_space(loc, xi); 1494 } 1495 1496 static void ocfs2_xa_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) 1497 { 1498 loc->xl_ops->xlo_add_entry(loc, name_hash); 1499 loc->xl_entry->xe_name_hash = cpu_to_le32(name_hash); 1500 /* 1501 * We can't leave the new entry's xe_name_offset at zero or 1502 * add_namevalue() will go nuts. We set it to the size of our 1503 * storage so that it can never be less than any other entry. 1504 */ 1505 loc->xl_entry->xe_name_offset = cpu_to_le16(loc->xl_size); 1506 } 1507 1508 static void ocfs2_xa_add_namevalue(struct ocfs2_xa_loc *loc, 1509 struct ocfs2_xattr_info *xi) 1510 { 1511 int size = namevalue_size_xi(xi); 1512 int nameval_offset; 1513 char *nameval_buf; 1514 1515 loc->xl_ops->xlo_add_namevalue(loc, size); 1516 loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len); 1517 loc->xl_entry->xe_name_len = xi->xi_name_len; 1518 ocfs2_xattr_set_type(loc->xl_entry, xi->xi_name_index); 1519 ocfs2_xattr_set_local(loc->xl_entry, 1520 xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE); 1521 1522 nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 1523 nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset); 1524 memset(nameval_buf, 0, size); 1525 memcpy(nameval_buf, xi->xi_name, xi->xi_name_len); 1526 } 1527 1528 static void ocfs2_xa_fill_value_buf(struct ocfs2_xa_loc *loc, 1529 struct ocfs2_xattr_value_buf *vb) 1530 { 1531 int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 1532 int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len); 1533 1534 /* Value bufs are for value trees */ 1535 BUG_ON(ocfs2_xattr_is_local(loc->xl_entry)); 1536 BUG_ON(namevalue_size_xe(loc->xl_entry) != 1537 (name_size + OCFS2_XATTR_ROOT_SIZE)); 1538 1539 loc->xl_ops->xlo_fill_value_buf(loc, vb); 1540 vb->vb_xv = 1541 (struct ocfs2_xattr_value_root *)ocfs2_xa_offset_pointer(loc, 1542 nameval_offset + 1543 name_size); 1544 } 1545 1546 static int ocfs2_xa_block_journal_access(handle_t *handle, 1547 struct ocfs2_xa_loc *loc, int type) 1548 { 1549 struct buffer_head *bh = loc->xl_storage; 1550 ocfs2_journal_access_func access; 1551 1552 if (loc->xl_size == (bh->b_size - 1553 offsetof(struct ocfs2_xattr_block, 1554 xb_attrs.xb_header))) 1555 access = ocfs2_journal_access_xb; 1556 else 1557 access = ocfs2_journal_access_di; 1558 return access(handle, INODE_CACHE(loc->xl_inode), bh, type); 1559 } 1560 1561 static void ocfs2_xa_block_journal_dirty(handle_t *handle, 1562 struct ocfs2_xa_loc *loc) 1563 { 1564 struct buffer_head *bh = loc->xl_storage; 1565 1566 ocfs2_journal_dirty(handle, bh); 1567 } 1568 1569 static void *ocfs2_xa_block_offset_pointer(struct ocfs2_xa_loc *loc, 1570 int offset) 1571 { 1572 return (char *)loc->xl_header + offset; 1573 } 1574 1575 static int ocfs2_xa_block_can_reuse(struct ocfs2_xa_loc *loc, 1576 struct ocfs2_xattr_info *xi) 1577 { 1578 /* 1579 * Block storage is strict. If the sizes aren't exact, we will 1580 * remove the old one and reinsert the new. 1581 */ 1582 return namevalue_size_xe(loc->xl_entry) == 1583 namevalue_size_xi(xi); 1584 } 1585 1586 static int ocfs2_xa_block_get_free_start(struct ocfs2_xa_loc *loc) 1587 { 1588 struct ocfs2_xattr_header *xh = loc->xl_header; 1589 int i, count = le16_to_cpu(xh->xh_count); 1590 int offset, free_start = loc->xl_size; 1591 1592 for (i = 0; i < count; i++) { 1593 offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset); 1594 if (offset < free_start) 1595 free_start = offset; 1596 } 1597 1598 return free_start; 1599 } 1600 1601 static int ocfs2_xa_block_check_space(struct ocfs2_xa_loc *loc, 1602 struct ocfs2_xattr_info *xi) 1603 { 1604 int count = le16_to_cpu(loc->xl_header->xh_count); 1605 int free_start = ocfs2_xa_get_free_start(loc); 1606 int needed_space = ocfs2_xi_entry_usage(xi); 1607 1608 /* 1609 * Block storage will reclaim the original entry before inserting 1610 * the new value, so we only need the difference. If the new 1611 * entry is smaller than the old one, we don't need anything. 1612 */ 1613 if (loc->xl_entry) { 1614 /* Don't need space if we're reusing! */ 1615 if (ocfs2_xa_can_reuse_entry(loc, xi)) 1616 needed_space = 0; 1617 else 1618 needed_space -= ocfs2_xe_entry_usage(loc->xl_entry); 1619 } 1620 if (needed_space < 0) 1621 needed_space = 0; 1622 return ocfs2_xa_check_space_helper(needed_space, free_start, count); 1623 } 1624 1625 /* 1626 * Block storage for xattrs keeps the name+value pairs compacted. When 1627 * we remove one, we have to shift any that preceded it towards the end. 1628 */ 1629 static void ocfs2_xa_block_wipe_namevalue(struct ocfs2_xa_loc *loc) 1630 { 1631 int i, offset; 1632 int namevalue_offset, first_namevalue_offset, namevalue_size; 1633 struct ocfs2_xattr_entry *entry = loc->xl_entry; 1634 struct ocfs2_xattr_header *xh = loc->xl_header; 1635 int count = le16_to_cpu(xh->xh_count); 1636 1637 namevalue_offset = le16_to_cpu(entry->xe_name_offset); 1638 namevalue_size = namevalue_size_xe(entry); 1639 first_namevalue_offset = ocfs2_xa_get_free_start(loc); 1640 1641 /* Shift the name+value pairs */ 1642 memmove((char *)xh + first_namevalue_offset + namevalue_size, 1643 (char *)xh + first_namevalue_offset, 1644 namevalue_offset - first_namevalue_offset); 1645 memset((char *)xh + first_namevalue_offset, 0, namevalue_size); 1646 1647 /* Now tell xh->xh_entries about it */ 1648 for (i = 0; i < count; i++) { 1649 offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset); 1650 if (offset <= namevalue_offset) 1651 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, 1652 namevalue_size); 1653 } 1654 1655 /* 1656 * Note that we don't update xh_free_start or xh_name_value_len 1657 * because they're not used in block-stored xattrs. 1658 */ 1659 } 1660 1661 static void ocfs2_xa_block_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) 1662 { 1663 int count = le16_to_cpu(loc->xl_header->xh_count); 1664 loc->xl_entry = &(loc->xl_header->xh_entries[count]); 1665 le16_add_cpu(&loc->xl_header->xh_count, 1); 1666 memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry)); 1667 } 1668 1669 static void ocfs2_xa_block_add_namevalue(struct ocfs2_xa_loc *loc, int size) 1670 { 1671 int free_start = ocfs2_xa_get_free_start(loc); 1672 1673 loc->xl_entry->xe_name_offset = cpu_to_le16(free_start - size); 1674 } 1675 1676 static void ocfs2_xa_block_fill_value_buf(struct ocfs2_xa_loc *loc, 1677 struct ocfs2_xattr_value_buf *vb) 1678 { 1679 struct buffer_head *bh = loc->xl_storage; 1680 1681 if (loc->xl_size == (bh->b_size - 1682 offsetof(struct ocfs2_xattr_block, 1683 xb_attrs.xb_header))) 1684 vb->vb_access = ocfs2_journal_access_xb; 1685 else 1686 vb->vb_access = ocfs2_journal_access_di; 1687 vb->vb_bh = bh; 1688 } 1689 1690 /* 1691 * Operations for xattrs stored in blocks. This includes inline inode 1692 * storage and unindexed ocfs2_xattr_blocks. 1693 */ 1694 static const struct ocfs2_xa_loc_operations ocfs2_xa_block_loc_ops = { 1695 .xlo_journal_access = ocfs2_xa_block_journal_access, 1696 .xlo_journal_dirty = ocfs2_xa_block_journal_dirty, 1697 .xlo_offset_pointer = ocfs2_xa_block_offset_pointer, 1698 .xlo_check_space = ocfs2_xa_block_check_space, 1699 .xlo_can_reuse = ocfs2_xa_block_can_reuse, 1700 .xlo_get_free_start = ocfs2_xa_block_get_free_start, 1701 .xlo_wipe_namevalue = ocfs2_xa_block_wipe_namevalue, 1702 .xlo_add_entry = ocfs2_xa_block_add_entry, 1703 .xlo_add_namevalue = ocfs2_xa_block_add_namevalue, 1704 .xlo_fill_value_buf = ocfs2_xa_block_fill_value_buf, 1705 }; 1706 1707 static int ocfs2_xa_bucket_journal_access(handle_t *handle, 1708 struct ocfs2_xa_loc *loc, int type) 1709 { 1710 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1711 1712 return ocfs2_xattr_bucket_journal_access(handle, bucket, type); 1713 } 1714 1715 static void ocfs2_xa_bucket_journal_dirty(handle_t *handle, 1716 struct ocfs2_xa_loc *loc) 1717 { 1718 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1719 1720 ocfs2_xattr_bucket_journal_dirty(handle, bucket); 1721 } 1722 1723 static void *ocfs2_xa_bucket_offset_pointer(struct ocfs2_xa_loc *loc, 1724 int offset) 1725 { 1726 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1727 int block, block_offset; 1728 1729 /* The header is at the front of the bucket */ 1730 block = offset >> loc->xl_inode->i_sb->s_blocksize_bits; 1731 block_offset = offset % loc->xl_inode->i_sb->s_blocksize; 1732 1733 return bucket_block(bucket, block) + block_offset; 1734 } 1735 1736 static int ocfs2_xa_bucket_can_reuse(struct ocfs2_xa_loc *loc, 1737 struct ocfs2_xattr_info *xi) 1738 { 1739 return namevalue_size_xe(loc->xl_entry) >= 1740 namevalue_size_xi(xi); 1741 } 1742 1743 static int ocfs2_xa_bucket_get_free_start(struct ocfs2_xa_loc *loc) 1744 { 1745 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1746 return le16_to_cpu(bucket_xh(bucket)->xh_free_start); 1747 } 1748 1749 static int ocfs2_bucket_align_free_start(struct super_block *sb, 1750 int free_start, int size) 1751 { 1752 /* 1753 * We need to make sure that the name+value pair fits within 1754 * one block. 1755 */ 1756 if (((free_start - size) >> sb->s_blocksize_bits) != 1757 ((free_start - 1) >> sb->s_blocksize_bits)) 1758 free_start -= free_start % sb->s_blocksize; 1759 1760 return free_start; 1761 } 1762 1763 static int ocfs2_xa_bucket_check_space(struct ocfs2_xa_loc *loc, 1764 struct ocfs2_xattr_info *xi) 1765 { 1766 int rc; 1767 int count = le16_to_cpu(loc->xl_header->xh_count); 1768 int free_start = ocfs2_xa_get_free_start(loc); 1769 int needed_space = ocfs2_xi_entry_usage(xi); 1770 int size = namevalue_size_xi(xi); 1771 struct super_block *sb = loc->xl_inode->i_sb; 1772 1773 /* 1774 * Bucket storage does not reclaim name+value pairs it cannot 1775 * reuse. They live as holes until the bucket fills, and then 1776 * the bucket is defragmented. However, the bucket can reclaim 1777 * the ocfs2_xattr_entry. 1778 */ 1779 if (loc->xl_entry) { 1780 /* Don't need space if we're reusing! */ 1781 if (ocfs2_xa_can_reuse_entry(loc, xi)) 1782 needed_space = 0; 1783 else 1784 needed_space -= sizeof(struct ocfs2_xattr_entry); 1785 } 1786 BUG_ON(needed_space < 0); 1787 1788 if (free_start < size) { 1789 if (needed_space) 1790 return -ENOSPC; 1791 } else { 1792 /* 1793 * First we check if it would fit in the first place. 1794 * Below, we align the free start to a block. This may 1795 * slide us below the minimum gap. By checking unaligned 1796 * first, we avoid that error. 1797 */ 1798 rc = ocfs2_xa_check_space_helper(needed_space, free_start, 1799 count); 1800 if (rc) 1801 return rc; 1802 free_start = ocfs2_bucket_align_free_start(sb, free_start, 1803 size); 1804 } 1805 return ocfs2_xa_check_space_helper(needed_space, free_start, count); 1806 } 1807 1808 static void ocfs2_xa_bucket_wipe_namevalue(struct ocfs2_xa_loc *loc) 1809 { 1810 le16_add_cpu(&loc->xl_header->xh_name_value_len, 1811 -namevalue_size_xe(loc->xl_entry)); 1812 } 1813 1814 static void ocfs2_xa_bucket_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) 1815 { 1816 struct ocfs2_xattr_header *xh = loc->xl_header; 1817 int count = le16_to_cpu(xh->xh_count); 1818 int low = 0, high = count - 1, tmp; 1819 struct ocfs2_xattr_entry *tmp_xe; 1820 1821 /* 1822 * We keep buckets sorted by name_hash, so we need to find 1823 * our insert place. 1824 */ 1825 while (low <= high && count) { 1826 tmp = (low + high) / 2; 1827 tmp_xe = &xh->xh_entries[tmp]; 1828 1829 if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash)) 1830 low = tmp + 1; 1831 else if (name_hash < le32_to_cpu(tmp_xe->xe_name_hash)) 1832 high = tmp - 1; 1833 else { 1834 low = tmp; 1835 break; 1836 } 1837 } 1838 1839 if (low != count) 1840 memmove(&xh->xh_entries[low + 1], 1841 &xh->xh_entries[low], 1842 ((count - low) * sizeof(struct ocfs2_xattr_entry))); 1843 1844 le16_add_cpu(&xh->xh_count, 1); 1845 loc->xl_entry = &xh->xh_entries[low]; 1846 memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry)); 1847 } 1848 1849 static void ocfs2_xa_bucket_add_namevalue(struct ocfs2_xa_loc *loc, int size) 1850 { 1851 int free_start = ocfs2_xa_get_free_start(loc); 1852 struct ocfs2_xattr_header *xh = loc->xl_header; 1853 struct super_block *sb = loc->xl_inode->i_sb; 1854 int nameval_offset; 1855 1856 free_start = ocfs2_bucket_align_free_start(sb, free_start, size); 1857 nameval_offset = free_start - size; 1858 loc->xl_entry->xe_name_offset = cpu_to_le16(nameval_offset); 1859 xh->xh_free_start = cpu_to_le16(nameval_offset); 1860 le16_add_cpu(&xh->xh_name_value_len, size); 1861 1862 } 1863 1864 static void ocfs2_xa_bucket_fill_value_buf(struct ocfs2_xa_loc *loc, 1865 struct ocfs2_xattr_value_buf *vb) 1866 { 1867 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1868 struct super_block *sb = loc->xl_inode->i_sb; 1869 int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 1870 int size = namevalue_size_xe(loc->xl_entry); 1871 int block_offset = nameval_offset >> sb->s_blocksize_bits; 1872 1873 /* Values are not allowed to straddle block boundaries */ 1874 BUG_ON(block_offset != 1875 ((nameval_offset + size - 1) >> sb->s_blocksize_bits)); 1876 /* We expect the bucket to be filled in */ 1877 BUG_ON(!bucket->bu_bhs[block_offset]); 1878 1879 vb->vb_access = ocfs2_journal_access; 1880 vb->vb_bh = bucket->bu_bhs[block_offset]; 1881 } 1882 1883 /* Operations for xattrs stored in buckets. */ 1884 static const struct ocfs2_xa_loc_operations ocfs2_xa_bucket_loc_ops = { 1885 .xlo_journal_access = ocfs2_xa_bucket_journal_access, 1886 .xlo_journal_dirty = ocfs2_xa_bucket_journal_dirty, 1887 .xlo_offset_pointer = ocfs2_xa_bucket_offset_pointer, 1888 .xlo_check_space = ocfs2_xa_bucket_check_space, 1889 .xlo_can_reuse = ocfs2_xa_bucket_can_reuse, 1890 .xlo_get_free_start = ocfs2_xa_bucket_get_free_start, 1891 .xlo_wipe_namevalue = ocfs2_xa_bucket_wipe_namevalue, 1892 .xlo_add_entry = ocfs2_xa_bucket_add_entry, 1893 .xlo_add_namevalue = ocfs2_xa_bucket_add_namevalue, 1894 .xlo_fill_value_buf = ocfs2_xa_bucket_fill_value_buf, 1895 }; 1896 1897 static unsigned int ocfs2_xa_value_clusters(struct ocfs2_xa_loc *loc) 1898 { 1899 struct ocfs2_xattr_value_buf vb; 1900 1901 if (ocfs2_xattr_is_local(loc->xl_entry)) 1902 return 0; 1903 1904 ocfs2_xa_fill_value_buf(loc, &vb); 1905 return le32_to_cpu(vb.vb_xv->xr_clusters); 1906 } 1907 1908 static int ocfs2_xa_value_truncate(struct ocfs2_xa_loc *loc, u64 bytes, 1909 struct ocfs2_xattr_set_ctxt *ctxt) 1910 { 1911 int trunc_rc, access_rc; 1912 struct ocfs2_xattr_value_buf vb; 1913 1914 ocfs2_xa_fill_value_buf(loc, &vb); 1915 trunc_rc = ocfs2_xattr_value_truncate(loc->xl_inode, &vb, bytes, 1916 ctxt); 1917 1918 /* 1919 * The caller of ocfs2_xa_value_truncate() has already called 1920 * ocfs2_xa_journal_access on the loc. However, The truncate code 1921 * calls ocfs2_extend_trans(). This may commit the previous 1922 * transaction and open a new one. If this is a bucket, truncate 1923 * could leave only vb->vb_bh set up for journaling. Meanwhile, 1924 * the caller is expecting to dirty the entire bucket. So we must 1925 * reset the journal work. We do this even if truncate has failed, 1926 * as it could have failed after committing the extend. 1927 */ 1928 access_rc = ocfs2_xa_journal_access(ctxt->handle, loc, 1929 OCFS2_JOURNAL_ACCESS_WRITE); 1930 1931 /* Errors in truncate take precedence */ 1932 return trunc_rc ? trunc_rc : access_rc; 1933 } 1934 1935 static void ocfs2_xa_remove_entry(struct ocfs2_xa_loc *loc) 1936 { 1937 int index, count; 1938 struct ocfs2_xattr_header *xh = loc->xl_header; 1939 struct ocfs2_xattr_entry *entry = loc->xl_entry; 1940 1941 ocfs2_xa_wipe_namevalue(loc); 1942 loc->xl_entry = NULL; 1943 1944 le16_add_cpu(&xh->xh_count, -1); 1945 count = le16_to_cpu(xh->xh_count); 1946 1947 /* 1948 * Only zero out the entry if there are more remaining. This is 1949 * important for an empty bucket, as it keeps track of the 1950 * bucket's hash value. It doesn't hurt empty block storage. 1951 */ 1952 if (count) { 1953 index = ((char *)entry - (char *)&xh->xh_entries) / 1954 sizeof(struct ocfs2_xattr_entry); 1955 memmove(&xh->xh_entries[index], &xh->xh_entries[index + 1], 1956 (count - index) * sizeof(struct ocfs2_xattr_entry)); 1957 memset(&xh->xh_entries[count], 0, 1958 sizeof(struct ocfs2_xattr_entry)); 1959 } 1960 } 1961 1962 /* 1963 * If we have a problem adjusting the size of an external value during 1964 * ocfs2_xa_prepare_entry() or ocfs2_xa_remove(), we may have an xattr 1965 * in an intermediate state. For example, the value may be partially 1966 * truncated. 1967 * 1968 * If the value tree hasn't changed, the extend/truncate went nowhere. 1969 * We have nothing to do. The caller can treat it as a straight error. 1970 * 1971 * If the value tree got partially truncated, we now have a corrupted 1972 * extended attribute. We're going to wipe its entry and leak the 1973 * clusters. Better to leak some storage than leave a corrupt entry. 1974 * 1975 * If the value tree grew, it obviously didn't grow enough for the 1976 * new entry. We're not going to try and reclaim those clusters either. 1977 * If there was already an external value there (orig_clusters != 0), 1978 * the new clusters are attached safely and we can just leave the old 1979 * value in place. If there was no external value there, we remove 1980 * the entry. 1981 * 1982 * This way, the xattr block we store in the journal will be consistent. 1983 * If the size change broke because of the journal, no changes will hit 1984 * disk anyway. 1985 */ 1986 static void ocfs2_xa_cleanup_value_truncate(struct ocfs2_xa_loc *loc, 1987 const char *what, 1988 unsigned int orig_clusters) 1989 { 1990 unsigned int new_clusters = ocfs2_xa_value_clusters(loc); 1991 char *nameval_buf = ocfs2_xa_offset_pointer(loc, 1992 le16_to_cpu(loc->xl_entry->xe_name_offset)); 1993 1994 if (new_clusters < orig_clusters) { 1995 mlog(ML_ERROR, 1996 "Partial truncate while %s xattr %.*s. Leaking " 1997 "%u clusters and removing the entry\n", 1998 what, loc->xl_entry->xe_name_len, nameval_buf, 1999 orig_clusters - new_clusters); 2000 ocfs2_xa_remove_entry(loc); 2001 } else if (!orig_clusters) { 2002 mlog(ML_ERROR, 2003 "Unable to allocate an external value for xattr " 2004 "%.*s safely. Leaking %u clusters and removing the " 2005 "entry\n", 2006 loc->xl_entry->xe_name_len, nameval_buf, 2007 new_clusters - orig_clusters); 2008 ocfs2_xa_remove_entry(loc); 2009 } else if (new_clusters > orig_clusters) 2010 mlog(ML_ERROR, 2011 "Unable to grow xattr %.*s safely. %u new clusters " 2012 "have been added, but the value will not be " 2013 "modified\n", 2014 loc->xl_entry->xe_name_len, nameval_buf, 2015 new_clusters - orig_clusters); 2016 } 2017 2018 static int ocfs2_xa_remove(struct ocfs2_xa_loc *loc, 2019 struct ocfs2_xattr_set_ctxt *ctxt) 2020 { 2021 int rc = 0; 2022 unsigned int orig_clusters; 2023 2024 if (!ocfs2_xattr_is_local(loc->xl_entry)) { 2025 orig_clusters = ocfs2_xa_value_clusters(loc); 2026 rc = ocfs2_xa_value_truncate(loc, 0, ctxt); 2027 if (rc) { 2028 mlog_errno(rc); 2029 /* 2030 * Since this is remove, we can return 0 if 2031 * ocfs2_xa_cleanup_value_truncate() is going to 2032 * wipe the entry anyway. So we check the 2033 * cluster count as well. 2034 */ 2035 if (orig_clusters != ocfs2_xa_value_clusters(loc)) 2036 rc = 0; 2037 ocfs2_xa_cleanup_value_truncate(loc, "removing", 2038 orig_clusters); 2039 goto out; 2040 } 2041 } 2042 2043 ocfs2_xa_remove_entry(loc); 2044 2045 out: 2046 return rc; 2047 } 2048 2049 static void ocfs2_xa_install_value_root(struct ocfs2_xa_loc *loc) 2050 { 2051 int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len); 2052 char *nameval_buf; 2053 2054 nameval_buf = ocfs2_xa_offset_pointer(loc, 2055 le16_to_cpu(loc->xl_entry->xe_name_offset)); 2056 memcpy(nameval_buf + name_size, &def_xv, OCFS2_XATTR_ROOT_SIZE); 2057 } 2058 2059 /* 2060 * Take an existing entry and make it ready for the new value. This 2061 * won't allocate space, but it may free space. It should be ready for 2062 * ocfs2_xa_prepare_entry() to finish the work. 2063 */ 2064 static int ocfs2_xa_reuse_entry(struct ocfs2_xa_loc *loc, 2065 struct ocfs2_xattr_info *xi, 2066 struct ocfs2_xattr_set_ctxt *ctxt) 2067 { 2068 int rc = 0; 2069 int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len); 2070 unsigned int orig_clusters; 2071 char *nameval_buf; 2072 int xe_local = ocfs2_xattr_is_local(loc->xl_entry); 2073 int xi_local = xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE; 2074 2075 BUG_ON(OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len) != 2076 name_size); 2077 2078 nameval_buf = ocfs2_xa_offset_pointer(loc, 2079 le16_to_cpu(loc->xl_entry->xe_name_offset)); 2080 if (xe_local) { 2081 memset(nameval_buf + name_size, 0, 2082 namevalue_size_xe(loc->xl_entry) - name_size); 2083 if (!xi_local) 2084 ocfs2_xa_install_value_root(loc); 2085 } else { 2086 orig_clusters = ocfs2_xa_value_clusters(loc); 2087 if (xi_local) { 2088 rc = ocfs2_xa_value_truncate(loc, 0, ctxt); 2089 if (rc < 0) 2090 mlog_errno(rc); 2091 else 2092 memset(nameval_buf + name_size, 0, 2093 namevalue_size_xe(loc->xl_entry) - 2094 name_size); 2095 } else if (le64_to_cpu(loc->xl_entry->xe_value_size) > 2096 xi->xi_value_len) { 2097 rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, 2098 ctxt); 2099 if (rc < 0) 2100 mlog_errno(rc); 2101 } 2102 2103 if (rc) { 2104 ocfs2_xa_cleanup_value_truncate(loc, "reusing", 2105 orig_clusters); 2106 goto out; 2107 } 2108 } 2109 2110 loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len); 2111 ocfs2_xattr_set_local(loc->xl_entry, xi_local); 2112 2113 out: 2114 return rc; 2115 } 2116 2117 /* 2118 * Prepares loc->xl_entry to receive the new xattr. This includes 2119 * properly setting up the name+value pair region. If loc->xl_entry 2120 * already exists, it will take care of modifying it appropriately. 2121 * 2122 * Note that this modifies the data. You did journal_access already, 2123 * right? 2124 */ 2125 static int ocfs2_xa_prepare_entry(struct ocfs2_xa_loc *loc, 2126 struct ocfs2_xattr_info *xi, 2127 u32 name_hash, 2128 struct ocfs2_xattr_set_ctxt *ctxt) 2129 { 2130 int rc = 0; 2131 unsigned int orig_clusters; 2132 __le64 orig_value_size = 0; 2133 2134 rc = ocfs2_xa_check_space(loc, xi); 2135 if (rc) 2136 goto out; 2137 2138 if (loc->xl_entry) { 2139 if (ocfs2_xa_can_reuse_entry(loc, xi)) { 2140 orig_value_size = loc->xl_entry->xe_value_size; 2141 rc = ocfs2_xa_reuse_entry(loc, xi, ctxt); 2142 if (rc) 2143 goto out; 2144 goto alloc_value; 2145 } 2146 2147 if (!ocfs2_xattr_is_local(loc->xl_entry)) { 2148 orig_clusters = ocfs2_xa_value_clusters(loc); 2149 rc = ocfs2_xa_value_truncate(loc, 0, ctxt); 2150 if (rc) { 2151 mlog_errno(rc); 2152 ocfs2_xa_cleanup_value_truncate(loc, 2153 "overwriting", 2154 orig_clusters); 2155 goto out; 2156 } 2157 } 2158 ocfs2_xa_wipe_namevalue(loc); 2159 } else 2160 ocfs2_xa_add_entry(loc, name_hash); 2161 2162 /* 2163 * If we get here, we have a blank entry. Fill it. We grow our 2164 * name+value pair back from the end. 2165 */ 2166 ocfs2_xa_add_namevalue(loc, xi); 2167 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) 2168 ocfs2_xa_install_value_root(loc); 2169 2170 alloc_value: 2171 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 2172 orig_clusters = ocfs2_xa_value_clusters(loc); 2173 rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, ctxt); 2174 if (rc < 0) { 2175 ctxt->set_abort = 1; 2176 ocfs2_xa_cleanup_value_truncate(loc, "growing", 2177 orig_clusters); 2178 /* 2179 * If we were growing an existing value, 2180 * ocfs2_xa_cleanup_value_truncate() won't remove 2181 * the entry. We need to restore the original value 2182 * size. 2183 */ 2184 if (loc->xl_entry) { 2185 BUG_ON(!orig_value_size); 2186 loc->xl_entry->xe_value_size = orig_value_size; 2187 } 2188 mlog_errno(rc); 2189 } 2190 } 2191 2192 out: 2193 return rc; 2194 } 2195 2196 /* 2197 * Store the value portion of the name+value pair. This will skip 2198 * values that are stored externally. Their tree roots were set up 2199 * by ocfs2_xa_prepare_entry(). 2200 */ 2201 static int ocfs2_xa_store_value(struct ocfs2_xa_loc *loc, 2202 struct ocfs2_xattr_info *xi, 2203 struct ocfs2_xattr_set_ctxt *ctxt) 2204 { 2205 int rc = 0; 2206 int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 2207 int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len); 2208 char *nameval_buf; 2209 struct ocfs2_xattr_value_buf vb; 2210 2211 nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset); 2212 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 2213 ocfs2_xa_fill_value_buf(loc, &vb); 2214 rc = __ocfs2_xattr_set_value_outside(loc->xl_inode, 2215 ctxt->handle, &vb, 2216 xi->xi_value, 2217 xi->xi_value_len); 2218 } else 2219 memcpy(nameval_buf + name_size, xi->xi_value, xi->xi_value_len); 2220 2221 return rc; 2222 } 2223 2224 static int ocfs2_xa_set(struct ocfs2_xa_loc *loc, 2225 struct ocfs2_xattr_info *xi, 2226 struct ocfs2_xattr_set_ctxt *ctxt) 2227 { 2228 int ret; 2229 u32 name_hash = ocfs2_xattr_name_hash(loc->xl_inode, xi->xi_name, 2230 xi->xi_name_len); 2231 2232 ret = ocfs2_xa_journal_access(ctxt->handle, loc, 2233 OCFS2_JOURNAL_ACCESS_WRITE); 2234 if (ret) { 2235 mlog_errno(ret); 2236 goto out; 2237 } 2238 2239 /* 2240 * From here on out, everything is going to modify the buffer a 2241 * little. Errors are going to leave the xattr header in a 2242 * sane state. Thus, even with errors we dirty the sucker. 2243 */ 2244 2245 /* Don't worry, we are never called with !xi_value and !xl_entry */ 2246 if (!xi->xi_value) { 2247 ret = ocfs2_xa_remove(loc, ctxt); 2248 goto out_dirty; 2249 } 2250 2251 ret = ocfs2_xa_prepare_entry(loc, xi, name_hash, ctxt); 2252 if (ret) { 2253 if (ret != -ENOSPC) 2254 mlog_errno(ret); 2255 goto out_dirty; 2256 } 2257 2258 ret = ocfs2_xa_store_value(loc, xi, ctxt); 2259 if (ret) 2260 mlog_errno(ret); 2261 2262 out_dirty: 2263 ocfs2_xa_journal_dirty(ctxt->handle, loc); 2264 2265 out: 2266 return ret; 2267 } 2268 2269 static void ocfs2_init_dinode_xa_loc(struct ocfs2_xa_loc *loc, 2270 struct inode *inode, 2271 struct buffer_head *bh, 2272 struct ocfs2_xattr_entry *entry) 2273 { 2274 struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; 2275 2276 BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_XATTR_FL)); 2277 2278 loc->xl_inode = inode; 2279 loc->xl_ops = &ocfs2_xa_block_loc_ops; 2280 loc->xl_storage = bh; 2281 loc->xl_entry = entry; 2282 loc->xl_size = le16_to_cpu(di->i_xattr_inline_size); 2283 loc->xl_header = 2284 (struct ocfs2_xattr_header *)(bh->b_data + bh->b_size - 2285 loc->xl_size); 2286 } 2287 2288 static void ocfs2_init_xattr_block_xa_loc(struct ocfs2_xa_loc *loc, 2289 struct inode *inode, 2290 struct buffer_head *bh, 2291 struct ocfs2_xattr_entry *entry) 2292 { 2293 struct ocfs2_xattr_block *xb = 2294 (struct ocfs2_xattr_block *)bh->b_data; 2295 2296 BUG_ON(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED); 2297 2298 loc->xl_inode = inode; 2299 loc->xl_ops = &ocfs2_xa_block_loc_ops; 2300 loc->xl_storage = bh; 2301 loc->xl_header = &(xb->xb_attrs.xb_header); 2302 loc->xl_entry = entry; 2303 loc->xl_size = bh->b_size - offsetof(struct ocfs2_xattr_block, 2304 xb_attrs.xb_header); 2305 } 2306 2307 static void ocfs2_init_xattr_bucket_xa_loc(struct ocfs2_xa_loc *loc, 2308 struct ocfs2_xattr_bucket *bucket, 2309 struct ocfs2_xattr_entry *entry) 2310 { 2311 loc->xl_inode = bucket->bu_inode; 2312 loc->xl_ops = &ocfs2_xa_bucket_loc_ops; 2313 loc->xl_storage = bucket; 2314 loc->xl_header = bucket_xh(bucket); 2315 loc->xl_entry = entry; 2316 loc->xl_size = OCFS2_XATTR_BUCKET_SIZE; 2317 } 2318 2319 /* 2320 * In xattr remove, if it is stored outside and refcounted, we may have 2321 * the chance to split the refcount tree. So need the allocators. 2322 */ 2323 static int ocfs2_lock_xattr_remove_allocators(struct inode *inode, 2324 struct ocfs2_xattr_value_root *xv, 2325 struct ocfs2_caching_info *ref_ci, 2326 struct buffer_head *ref_root_bh, 2327 struct ocfs2_alloc_context **meta_ac, 2328 int *ref_credits) 2329 { 2330 int ret, meta_add = 0; 2331 u32 p_cluster, num_clusters; 2332 unsigned int ext_flags; 2333 2334 *ref_credits = 0; 2335 ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster, 2336 &num_clusters, 2337 &xv->xr_list, 2338 &ext_flags); 2339 if (ret) { 2340 mlog_errno(ret); 2341 goto out; 2342 } 2343 2344 if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) 2345 goto out; 2346 2347 ret = ocfs2_refcounted_xattr_delete_need(inode, ref_ci, 2348 ref_root_bh, xv, 2349 &meta_add, ref_credits); 2350 if (ret) { 2351 mlog_errno(ret); 2352 goto out; 2353 } 2354 2355 ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb), 2356 meta_add, meta_ac); 2357 if (ret) 2358 mlog_errno(ret); 2359 2360 out: 2361 return ret; 2362 } 2363 2364 static int ocfs2_remove_value_outside(struct inode*inode, 2365 struct ocfs2_xattr_value_buf *vb, 2366 struct ocfs2_xattr_header *header, 2367 struct ocfs2_caching_info *ref_ci, 2368 struct buffer_head *ref_root_bh) 2369 { 2370 int ret = 0, i, ref_credits; 2371 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2372 struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, }; 2373 void *val; 2374 2375 ocfs2_init_dealloc_ctxt(&ctxt.dealloc); 2376 2377 for (i = 0; i < le16_to_cpu(header->xh_count); i++) { 2378 struct ocfs2_xattr_entry *entry = &header->xh_entries[i]; 2379 2380 if (ocfs2_xattr_is_local(entry)) 2381 continue; 2382 2383 val = (void *)header + 2384 le16_to_cpu(entry->xe_name_offset); 2385 vb->vb_xv = (struct ocfs2_xattr_value_root *) 2386 (val + OCFS2_XATTR_SIZE(entry->xe_name_len)); 2387 2388 ret = ocfs2_lock_xattr_remove_allocators(inode, vb->vb_xv, 2389 ref_ci, ref_root_bh, 2390 &ctxt.meta_ac, 2391 &ref_credits); 2392 2393 ctxt.handle = ocfs2_start_trans(osb, ref_credits + 2394 ocfs2_remove_extent_credits(osb->sb)); 2395 if (IS_ERR(ctxt.handle)) { 2396 ret = PTR_ERR(ctxt.handle); 2397 mlog_errno(ret); 2398 break; 2399 } 2400 2401 ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt); 2402 2403 ocfs2_commit_trans(osb, ctxt.handle); 2404 if (ctxt.meta_ac) { 2405 ocfs2_free_alloc_context(ctxt.meta_ac); 2406 ctxt.meta_ac = NULL; 2407 } 2408 2409 if (ret < 0) { 2410 mlog_errno(ret); 2411 break; 2412 } 2413 2414 } 2415 2416 if (ctxt.meta_ac) 2417 ocfs2_free_alloc_context(ctxt.meta_ac); 2418 ocfs2_schedule_truncate_log_flush(osb, 1); 2419 ocfs2_run_deallocs(osb, &ctxt.dealloc); 2420 return ret; 2421 } 2422 2423 static int ocfs2_xattr_ibody_remove(struct inode *inode, 2424 struct buffer_head *di_bh, 2425 struct ocfs2_caching_info *ref_ci, 2426 struct buffer_head *ref_root_bh) 2427 { 2428 2429 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 2430 struct ocfs2_xattr_header *header; 2431 int ret; 2432 struct ocfs2_xattr_value_buf vb = { 2433 .vb_bh = di_bh, 2434 .vb_access = ocfs2_journal_access_di, 2435 }; 2436 2437 header = (struct ocfs2_xattr_header *) 2438 ((void *)di + inode->i_sb->s_blocksize - 2439 le16_to_cpu(di->i_xattr_inline_size)); 2440 2441 ret = ocfs2_remove_value_outside(inode, &vb, header, 2442 ref_ci, ref_root_bh); 2443 2444 return ret; 2445 } 2446 2447 struct ocfs2_rm_xattr_bucket_para { 2448 struct ocfs2_caching_info *ref_ci; 2449 struct buffer_head *ref_root_bh; 2450 }; 2451 2452 static int ocfs2_xattr_block_remove(struct inode *inode, 2453 struct buffer_head *blk_bh, 2454 struct ocfs2_caching_info *ref_ci, 2455 struct buffer_head *ref_root_bh) 2456 { 2457 struct ocfs2_xattr_block *xb; 2458 int ret = 0; 2459 struct ocfs2_xattr_value_buf vb = { 2460 .vb_bh = blk_bh, 2461 .vb_access = ocfs2_journal_access_xb, 2462 }; 2463 struct ocfs2_rm_xattr_bucket_para args = { 2464 .ref_ci = ref_ci, 2465 .ref_root_bh = ref_root_bh, 2466 }; 2467 2468 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 2469 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 2470 struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header); 2471 ret = ocfs2_remove_value_outside(inode, &vb, header, 2472 ref_ci, ref_root_bh); 2473 } else 2474 ret = ocfs2_iterate_xattr_index_block(inode, 2475 blk_bh, 2476 ocfs2_rm_xattr_cluster, 2477 &args); 2478 2479 return ret; 2480 } 2481 2482 static int ocfs2_xattr_free_block(struct inode *inode, 2483 u64 block, 2484 struct ocfs2_caching_info *ref_ci, 2485 struct buffer_head *ref_root_bh) 2486 { 2487 struct inode *xb_alloc_inode; 2488 struct buffer_head *xb_alloc_bh = NULL; 2489 struct buffer_head *blk_bh = NULL; 2490 struct ocfs2_xattr_block *xb; 2491 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2492 handle_t *handle; 2493 int ret = 0; 2494 u64 blk, bg_blkno; 2495 u16 bit; 2496 2497 ret = ocfs2_read_xattr_block(inode, block, &blk_bh); 2498 if (ret < 0) { 2499 mlog_errno(ret); 2500 goto out; 2501 } 2502 2503 ret = ocfs2_xattr_block_remove(inode, blk_bh, ref_ci, ref_root_bh); 2504 if (ret < 0) { 2505 mlog_errno(ret); 2506 goto out; 2507 } 2508 2509 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 2510 blk = le64_to_cpu(xb->xb_blkno); 2511 bit = le16_to_cpu(xb->xb_suballoc_bit); 2512 if (xb->xb_suballoc_loc) 2513 bg_blkno = le64_to_cpu(xb->xb_suballoc_loc); 2514 else 2515 bg_blkno = ocfs2_which_suballoc_group(blk, bit); 2516 2517 xb_alloc_inode = ocfs2_get_system_file_inode(osb, 2518 EXTENT_ALLOC_SYSTEM_INODE, 2519 le16_to_cpu(xb->xb_suballoc_slot)); 2520 if (!xb_alloc_inode) { 2521 ret = -ENOMEM; 2522 mlog_errno(ret); 2523 goto out; 2524 } 2525 inode_lock(xb_alloc_inode); 2526 2527 ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1); 2528 if (ret < 0) { 2529 mlog_errno(ret); 2530 goto out_mutex; 2531 } 2532 2533 handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE); 2534 if (IS_ERR(handle)) { 2535 ret = PTR_ERR(handle); 2536 mlog_errno(ret); 2537 goto out_unlock; 2538 } 2539 2540 ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh, 2541 bit, bg_blkno, 1); 2542 if (ret < 0) 2543 mlog_errno(ret); 2544 2545 ocfs2_commit_trans(osb, handle); 2546 out_unlock: 2547 ocfs2_inode_unlock(xb_alloc_inode, 1); 2548 brelse(xb_alloc_bh); 2549 out_mutex: 2550 inode_unlock(xb_alloc_inode); 2551 iput(xb_alloc_inode); 2552 out: 2553 brelse(blk_bh); 2554 return ret; 2555 } 2556 2557 /* 2558 * ocfs2_xattr_remove() 2559 * 2560 * Free extended attribute resources associated with this inode. 2561 */ 2562 int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh) 2563 { 2564 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2565 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 2566 struct ocfs2_refcount_tree *ref_tree = NULL; 2567 struct buffer_head *ref_root_bh = NULL; 2568 struct ocfs2_caching_info *ref_ci = NULL; 2569 handle_t *handle; 2570 int ret; 2571 2572 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) 2573 return 0; 2574 2575 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) 2576 return 0; 2577 2578 if (ocfs2_is_refcount_inode(inode)) { 2579 ret = ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb), 2580 le64_to_cpu(di->i_refcount_loc), 2581 1, &ref_tree, &ref_root_bh); 2582 if (ret) { 2583 mlog_errno(ret); 2584 goto out; 2585 } 2586 ref_ci = &ref_tree->rf_ci; 2587 2588 } 2589 2590 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 2591 ret = ocfs2_xattr_ibody_remove(inode, di_bh, 2592 ref_ci, ref_root_bh); 2593 if (ret < 0) { 2594 mlog_errno(ret); 2595 goto out; 2596 } 2597 } 2598 2599 if (di->i_xattr_loc) { 2600 ret = ocfs2_xattr_free_block(inode, 2601 le64_to_cpu(di->i_xattr_loc), 2602 ref_ci, ref_root_bh); 2603 if (ret < 0) { 2604 mlog_errno(ret); 2605 goto out; 2606 } 2607 } 2608 2609 handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), 2610 OCFS2_INODE_UPDATE_CREDITS); 2611 if (IS_ERR(handle)) { 2612 ret = PTR_ERR(handle); 2613 mlog_errno(ret); 2614 goto out; 2615 } 2616 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, 2617 OCFS2_JOURNAL_ACCESS_WRITE); 2618 if (ret) { 2619 mlog_errno(ret); 2620 goto out_commit; 2621 } 2622 2623 di->i_xattr_loc = 0; 2624 2625 spin_lock(&oi->ip_lock); 2626 oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL); 2627 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 2628 spin_unlock(&oi->ip_lock); 2629 ocfs2_update_inode_fsync_trans(handle, inode, 0); 2630 2631 ocfs2_journal_dirty(handle, di_bh); 2632 out_commit: 2633 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); 2634 out: 2635 if (ref_tree) 2636 ocfs2_unlock_refcount_tree(OCFS2_SB(inode->i_sb), ref_tree, 1); 2637 brelse(ref_root_bh); 2638 return ret; 2639 } 2640 2641 static int ocfs2_xattr_has_space_inline(struct inode *inode, 2642 struct ocfs2_dinode *di) 2643 { 2644 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2645 unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size; 2646 int free; 2647 2648 if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE) 2649 return 0; 2650 2651 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 2652 struct ocfs2_inline_data *idata = &di->id2.i_data; 2653 free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size); 2654 } else if (ocfs2_inode_is_fast_symlink(inode)) { 2655 free = ocfs2_fast_symlink_chars(inode->i_sb) - 2656 le64_to_cpu(di->i_size); 2657 } else { 2658 struct ocfs2_extent_list *el = &di->id2.i_list; 2659 free = (le16_to_cpu(el->l_count) - 2660 le16_to_cpu(el->l_next_free_rec)) * 2661 sizeof(struct ocfs2_extent_rec); 2662 } 2663 if (free >= xattrsize) 2664 return 1; 2665 2666 return 0; 2667 } 2668 2669 /* 2670 * ocfs2_xattr_ibody_find() 2671 * 2672 * Find extended attribute in inode block and 2673 * fill search info into struct ocfs2_xattr_search. 2674 */ 2675 static int ocfs2_xattr_ibody_find(struct inode *inode, 2676 int name_index, 2677 const char *name, 2678 struct ocfs2_xattr_search *xs) 2679 { 2680 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2681 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 2682 int ret; 2683 int has_space = 0; 2684 2685 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) 2686 return 0; 2687 2688 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { 2689 down_read(&oi->ip_alloc_sem); 2690 has_space = ocfs2_xattr_has_space_inline(inode, di); 2691 up_read(&oi->ip_alloc_sem); 2692 if (!has_space) 2693 return 0; 2694 } 2695 2696 xs->xattr_bh = xs->inode_bh; 2697 xs->end = (void *)di + inode->i_sb->s_blocksize; 2698 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) 2699 xs->header = (struct ocfs2_xattr_header *) 2700 (xs->end - le16_to_cpu(di->i_xattr_inline_size)); 2701 else 2702 xs->header = (struct ocfs2_xattr_header *) 2703 (xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size); 2704 xs->base = (void *)xs->header; 2705 xs->here = xs->header->xh_entries; 2706 2707 /* Find the named attribute. */ 2708 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 2709 ret = ocfs2_xattr_find_entry(inode, name_index, name, xs); 2710 if (ret && ret != -ENODATA) 2711 return ret; 2712 xs->not_found = ret; 2713 } 2714 2715 return 0; 2716 } 2717 2718 static int ocfs2_xattr_ibody_init(struct inode *inode, 2719 struct buffer_head *di_bh, 2720 struct ocfs2_xattr_set_ctxt *ctxt) 2721 { 2722 int ret; 2723 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2724 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 2725 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2726 unsigned int xattrsize = osb->s_xattr_inline_size; 2727 2728 if (!ocfs2_xattr_has_space_inline(inode, di)) { 2729 ret = -ENOSPC; 2730 goto out; 2731 } 2732 2733 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), di_bh, 2734 OCFS2_JOURNAL_ACCESS_WRITE); 2735 if (ret) { 2736 mlog_errno(ret); 2737 goto out; 2738 } 2739 2740 /* 2741 * Adjust extent record count or inline data size 2742 * to reserve space for extended attribute. 2743 */ 2744 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 2745 struct ocfs2_inline_data *idata = &di->id2.i_data; 2746 le16_add_cpu(&idata->id_count, -xattrsize); 2747 } else if (!(ocfs2_inode_is_fast_symlink(inode))) { 2748 struct ocfs2_extent_list *el = &di->id2.i_list; 2749 le16_add_cpu(&el->l_count, -(xattrsize / 2750 sizeof(struct ocfs2_extent_rec))); 2751 } 2752 di->i_xattr_inline_size = cpu_to_le16(xattrsize); 2753 2754 spin_lock(&oi->ip_lock); 2755 oi->ip_dyn_features |= OCFS2_INLINE_XATTR_FL|OCFS2_HAS_XATTR_FL; 2756 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 2757 spin_unlock(&oi->ip_lock); 2758 2759 ocfs2_journal_dirty(ctxt->handle, di_bh); 2760 2761 out: 2762 return ret; 2763 } 2764 2765 /* 2766 * ocfs2_xattr_ibody_set() 2767 * 2768 * Set, replace or remove an extended attribute into inode block. 2769 * 2770 */ 2771 static int ocfs2_xattr_ibody_set(struct inode *inode, 2772 struct ocfs2_xattr_info *xi, 2773 struct ocfs2_xattr_search *xs, 2774 struct ocfs2_xattr_set_ctxt *ctxt) 2775 { 2776 int ret; 2777 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2778 struct ocfs2_xa_loc loc; 2779 2780 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) 2781 return -ENOSPC; 2782 2783 down_write(&oi->ip_alloc_sem); 2784 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { 2785 ret = ocfs2_xattr_ibody_init(inode, xs->inode_bh, ctxt); 2786 if (ret) { 2787 if (ret != -ENOSPC) 2788 mlog_errno(ret); 2789 goto out; 2790 } 2791 } 2792 2793 ocfs2_init_dinode_xa_loc(&loc, inode, xs->inode_bh, 2794 xs->not_found ? NULL : xs->here); 2795 ret = ocfs2_xa_set(&loc, xi, ctxt); 2796 if (ret) { 2797 if (ret != -ENOSPC) 2798 mlog_errno(ret); 2799 goto out; 2800 } 2801 xs->here = loc.xl_entry; 2802 2803 out: 2804 up_write(&oi->ip_alloc_sem); 2805 2806 return ret; 2807 } 2808 2809 /* 2810 * ocfs2_xattr_block_find() 2811 * 2812 * Find extended attribute in external block and 2813 * fill search info into struct ocfs2_xattr_search. 2814 */ 2815 static int ocfs2_xattr_block_find(struct inode *inode, 2816 int name_index, 2817 const char *name, 2818 struct ocfs2_xattr_search *xs) 2819 { 2820 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 2821 struct buffer_head *blk_bh = NULL; 2822 struct ocfs2_xattr_block *xb; 2823 int ret = 0; 2824 2825 if (!di->i_xattr_loc) 2826 return ret; 2827 2828 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc), 2829 &blk_bh); 2830 if (ret < 0) { 2831 mlog_errno(ret); 2832 return ret; 2833 } 2834 2835 xs->xattr_bh = blk_bh; 2836 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 2837 2838 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 2839 xs->header = &xb->xb_attrs.xb_header; 2840 xs->base = (void *)xs->header; 2841 xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size; 2842 xs->here = xs->header->xh_entries; 2843 2844 ret = ocfs2_xattr_find_entry(inode, name_index, name, xs); 2845 } else 2846 ret = ocfs2_xattr_index_block_find(inode, blk_bh, 2847 name_index, 2848 name, xs); 2849 2850 if (ret && ret != -ENODATA) { 2851 xs->xattr_bh = NULL; 2852 goto cleanup; 2853 } 2854 xs->not_found = ret; 2855 return 0; 2856 cleanup: 2857 brelse(blk_bh); 2858 2859 return ret; 2860 } 2861 2862 static int ocfs2_create_xattr_block(struct inode *inode, 2863 struct buffer_head *inode_bh, 2864 struct ocfs2_xattr_set_ctxt *ctxt, 2865 int indexed, 2866 struct buffer_head **ret_bh) 2867 { 2868 int ret; 2869 u16 suballoc_bit_start; 2870 u32 num_got; 2871 u64 suballoc_loc, first_blkno; 2872 struct ocfs2_dinode *di = (struct ocfs2_dinode *)inode_bh->b_data; 2873 struct buffer_head *new_bh = NULL; 2874 struct ocfs2_xattr_block *xblk; 2875 2876 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), 2877 inode_bh, OCFS2_JOURNAL_ACCESS_CREATE); 2878 if (ret < 0) { 2879 mlog_errno(ret); 2880 goto end; 2881 } 2882 2883 ret = ocfs2_claim_metadata(ctxt->handle, ctxt->meta_ac, 1, 2884 &suballoc_loc, &suballoc_bit_start, 2885 &num_got, &first_blkno); 2886 if (ret < 0) { 2887 mlog_errno(ret); 2888 goto end; 2889 } 2890 2891 new_bh = sb_getblk(inode->i_sb, first_blkno); 2892 if (!new_bh) { 2893 ret = -ENOMEM; 2894 mlog_errno(ret); 2895 goto end; 2896 } 2897 2898 ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh); 2899 2900 ret = ocfs2_journal_access_xb(ctxt->handle, INODE_CACHE(inode), 2901 new_bh, 2902 OCFS2_JOURNAL_ACCESS_CREATE); 2903 if (ret < 0) { 2904 mlog_errno(ret); 2905 goto end; 2906 } 2907 2908 /* Initialize ocfs2_xattr_block */ 2909 xblk = (struct ocfs2_xattr_block *)new_bh->b_data; 2910 memset(xblk, 0, inode->i_sb->s_blocksize); 2911 strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE); 2912 xblk->xb_suballoc_slot = cpu_to_le16(ctxt->meta_ac->ac_alloc_slot); 2913 xblk->xb_suballoc_loc = cpu_to_le64(suballoc_loc); 2914 xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start); 2915 xblk->xb_fs_generation = 2916 cpu_to_le32(OCFS2_SB(inode->i_sb)->fs_generation); 2917 xblk->xb_blkno = cpu_to_le64(first_blkno); 2918 if (indexed) { 2919 struct ocfs2_xattr_tree_root *xr = &xblk->xb_attrs.xb_root; 2920 xr->xt_clusters = cpu_to_le32(1); 2921 xr->xt_last_eb_blk = 0; 2922 xr->xt_list.l_tree_depth = 0; 2923 xr->xt_list.l_count = cpu_to_le16( 2924 ocfs2_xattr_recs_per_xb(inode->i_sb)); 2925 xr->xt_list.l_next_free_rec = cpu_to_le16(1); 2926 xblk->xb_flags = cpu_to_le16(OCFS2_XATTR_INDEXED); 2927 } 2928 ocfs2_journal_dirty(ctxt->handle, new_bh); 2929 2930 /* Add it to the inode */ 2931 di->i_xattr_loc = cpu_to_le64(first_blkno); 2932 2933 spin_lock(&OCFS2_I(inode)->ip_lock); 2934 OCFS2_I(inode)->ip_dyn_features |= OCFS2_HAS_XATTR_FL; 2935 di->i_dyn_features = cpu_to_le16(OCFS2_I(inode)->ip_dyn_features); 2936 spin_unlock(&OCFS2_I(inode)->ip_lock); 2937 2938 ocfs2_journal_dirty(ctxt->handle, inode_bh); 2939 2940 *ret_bh = new_bh; 2941 new_bh = NULL; 2942 2943 end: 2944 brelse(new_bh); 2945 return ret; 2946 } 2947 2948 /* 2949 * ocfs2_xattr_block_set() 2950 * 2951 * Set, replace or remove an extended attribute into external block. 2952 * 2953 */ 2954 static int ocfs2_xattr_block_set(struct inode *inode, 2955 struct ocfs2_xattr_info *xi, 2956 struct ocfs2_xattr_search *xs, 2957 struct ocfs2_xattr_set_ctxt *ctxt) 2958 { 2959 struct buffer_head *new_bh = NULL; 2960 struct ocfs2_xattr_block *xblk = NULL; 2961 int ret; 2962 struct ocfs2_xa_loc loc; 2963 2964 if (!xs->xattr_bh) { 2965 ret = ocfs2_create_xattr_block(inode, xs->inode_bh, ctxt, 2966 0, &new_bh); 2967 if (ret) { 2968 mlog_errno(ret); 2969 goto end; 2970 } 2971 2972 xs->xattr_bh = new_bh; 2973 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; 2974 xs->header = &xblk->xb_attrs.xb_header; 2975 xs->base = (void *)xs->header; 2976 xs->end = (void *)xblk + inode->i_sb->s_blocksize; 2977 xs->here = xs->header->xh_entries; 2978 } else 2979 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; 2980 2981 if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) { 2982 ocfs2_init_xattr_block_xa_loc(&loc, inode, xs->xattr_bh, 2983 xs->not_found ? NULL : xs->here); 2984 2985 ret = ocfs2_xa_set(&loc, xi, ctxt); 2986 if (!ret) 2987 xs->here = loc.xl_entry; 2988 else if ((ret != -ENOSPC) || ctxt->set_abort) 2989 goto end; 2990 else { 2991 ret = ocfs2_xattr_create_index_block(inode, xs, ctxt); 2992 if (ret) 2993 goto end; 2994 } 2995 } 2996 2997 if (le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED) 2998 ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt); 2999 3000 end: 3001 return ret; 3002 } 3003 3004 /* Check whether the new xattr can be inserted into the inode. */ 3005 static int ocfs2_xattr_can_be_in_inode(struct inode *inode, 3006 struct ocfs2_xattr_info *xi, 3007 struct ocfs2_xattr_search *xs) 3008 { 3009 struct ocfs2_xattr_entry *last; 3010 int free, i; 3011 size_t min_offs = xs->end - xs->base; 3012 3013 if (!xs->header) 3014 return 0; 3015 3016 last = xs->header->xh_entries; 3017 3018 for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) { 3019 size_t offs = le16_to_cpu(last->xe_name_offset); 3020 if (offs < min_offs) 3021 min_offs = offs; 3022 last += 1; 3023 } 3024 3025 free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP; 3026 if (free < 0) 3027 return 0; 3028 3029 BUG_ON(!xs->not_found); 3030 3031 if (free >= (sizeof(struct ocfs2_xattr_entry) + namevalue_size_xi(xi))) 3032 return 1; 3033 3034 return 0; 3035 } 3036 3037 static int ocfs2_calc_xattr_set_need(struct inode *inode, 3038 struct ocfs2_dinode *di, 3039 struct ocfs2_xattr_info *xi, 3040 struct ocfs2_xattr_search *xis, 3041 struct ocfs2_xattr_search *xbs, 3042 int *clusters_need, 3043 int *meta_need, 3044 int *credits_need) 3045 { 3046 int ret = 0, old_in_xb = 0; 3047 int clusters_add = 0, meta_add = 0, credits = 0; 3048 struct buffer_head *bh = NULL; 3049 struct ocfs2_xattr_block *xb = NULL; 3050 struct ocfs2_xattr_entry *xe = NULL; 3051 struct ocfs2_xattr_value_root *xv = NULL; 3052 char *base = NULL; 3053 int name_offset, name_len = 0; 3054 u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, 3055 xi->xi_value_len); 3056 u64 value_size; 3057 3058 /* 3059 * Calculate the clusters we need to write. 3060 * No matter whether we replace an old one or add a new one, 3061 * we need this for writing. 3062 */ 3063 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) 3064 credits += new_clusters * 3065 ocfs2_clusters_to_blocks(inode->i_sb, 1); 3066 3067 if (xis->not_found && xbs->not_found) { 3068 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3069 3070 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 3071 clusters_add += new_clusters; 3072 credits += ocfs2_calc_extend_credits(inode->i_sb, 3073 &def_xv.xv.xr_list); 3074 } 3075 3076 goto meta_guess; 3077 } 3078 3079 if (!xis->not_found) { 3080 xe = xis->here; 3081 name_offset = le16_to_cpu(xe->xe_name_offset); 3082 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 3083 base = xis->base; 3084 credits += OCFS2_INODE_UPDATE_CREDITS; 3085 } else { 3086 int i, block_off = 0; 3087 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; 3088 xe = xbs->here; 3089 name_offset = le16_to_cpu(xe->xe_name_offset); 3090 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 3091 i = xbs->here - xbs->header->xh_entries; 3092 old_in_xb = 1; 3093 3094 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 3095 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 3096 bucket_xh(xbs->bucket), 3097 i, &block_off, 3098 &name_offset); 3099 base = bucket_block(xbs->bucket, block_off); 3100 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3101 } else { 3102 base = xbs->base; 3103 credits += OCFS2_XATTR_BLOCK_UPDATE_CREDITS; 3104 } 3105 } 3106 3107 /* 3108 * delete a xattr doesn't need metadata and cluster allocation. 3109 * so just calculate the credits and return. 3110 * 3111 * The credits for removing the value tree will be extended 3112 * by ocfs2_remove_extent itself. 3113 */ 3114 if (!xi->xi_value) { 3115 if (!ocfs2_xattr_is_local(xe)) 3116 credits += ocfs2_remove_extent_credits(inode->i_sb); 3117 3118 goto out; 3119 } 3120 3121 /* do cluster allocation guess first. */ 3122 value_size = le64_to_cpu(xe->xe_value_size); 3123 3124 if (old_in_xb) { 3125 /* 3126 * In xattr set, we always try to set the xe in inode first, 3127 * so if it can be inserted into inode successfully, the old 3128 * one will be removed from the xattr block, and this xattr 3129 * will be inserted into inode as a new xattr in inode. 3130 */ 3131 if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) { 3132 clusters_add += new_clusters; 3133 credits += ocfs2_remove_extent_credits(inode->i_sb) + 3134 OCFS2_INODE_UPDATE_CREDITS; 3135 if (!ocfs2_xattr_is_local(xe)) 3136 credits += ocfs2_calc_extend_credits( 3137 inode->i_sb, 3138 &def_xv.xv.xr_list); 3139 goto out; 3140 } 3141 } 3142 3143 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 3144 /* the new values will be stored outside. */ 3145 u32 old_clusters = 0; 3146 3147 if (!ocfs2_xattr_is_local(xe)) { 3148 old_clusters = ocfs2_clusters_for_bytes(inode->i_sb, 3149 value_size); 3150 xv = (struct ocfs2_xattr_value_root *) 3151 (base + name_offset + name_len); 3152 value_size = OCFS2_XATTR_ROOT_SIZE; 3153 } else 3154 xv = &def_xv.xv; 3155 3156 if (old_clusters >= new_clusters) { 3157 credits += ocfs2_remove_extent_credits(inode->i_sb); 3158 goto out; 3159 } else { 3160 meta_add += ocfs2_extend_meta_needed(&xv->xr_list); 3161 clusters_add += new_clusters - old_clusters; 3162 credits += ocfs2_calc_extend_credits(inode->i_sb, 3163 &xv->xr_list); 3164 if (value_size >= OCFS2_XATTR_ROOT_SIZE) 3165 goto out; 3166 } 3167 } else { 3168 /* 3169 * Now the new value will be stored inside. So if the new 3170 * value is smaller than the size of value root or the old 3171 * value, we don't need any allocation, otherwise we have 3172 * to guess metadata allocation. 3173 */ 3174 if ((ocfs2_xattr_is_local(xe) && 3175 (value_size >= xi->xi_value_len)) || 3176 (!ocfs2_xattr_is_local(xe) && 3177 OCFS2_XATTR_ROOT_SIZE >= xi->xi_value_len)) 3178 goto out; 3179 } 3180 3181 meta_guess: 3182 /* calculate metadata allocation. */ 3183 if (di->i_xattr_loc) { 3184 if (!xbs->xattr_bh) { 3185 ret = ocfs2_read_xattr_block(inode, 3186 le64_to_cpu(di->i_xattr_loc), 3187 &bh); 3188 if (ret) { 3189 mlog_errno(ret); 3190 goto out; 3191 } 3192 3193 xb = (struct ocfs2_xattr_block *)bh->b_data; 3194 } else 3195 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; 3196 3197 /* 3198 * If there is already an xattr tree, good, we can calculate 3199 * like other b-trees. Otherwise we may have the chance of 3200 * create a tree, the credit calculation is borrowed from 3201 * ocfs2_calc_extend_credits with root_el = NULL. And the 3202 * new tree will be cluster based, so no meta is needed. 3203 */ 3204 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 3205 struct ocfs2_extent_list *el = 3206 &xb->xb_attrs.xb_root.xt_list; 3207 meta_add += ocfs2_extend_meta_needed(el); 3208 credits += ocfs2_calc_extend_credits(inode->i_sb, 3209 el); 3210 } else 3211 credits += OCFS2_SUBALLOC_ALLOC + 1; 3212 3213 /* 3214 * This cluster will be used either for new bucket or for 3215 * new xattr block. 3216 * If the cluster size is the same as the bucket size, one 3217 * more is needed since we may need to extend the bucket 3218 * also. 3219 */ 3220 clusters_add += 1; 3221 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3222 if (OCFS2_XATTR_BUCKET_SIZE == 3223 OCFS2_SB(inode->i_sb)->s_clustersize) { 3224 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3225 clusters_add += 1; 3226 } 3227 } else { 3228 credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; 3229 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 3230 struct ocfs2_extent_list *el = &def_xv.xv.xr_list; 3231 meta_add += ocfs2_extend_meta_needed(el); 3232 credits += ocfs2_calc_extend_credits(inode->i_sb, 3233 el); 3234 } else { 3235 meta_add += 1; 3236 } 3237 } 3238 out: 3239 if (clusters_need) 3240 *clusters_need = clusters_add; 3241 if (meta_need) 3242 *meta_need = meta_add; 3243 if (credits_need) 3244 *credits_need = credits; 3245 brelse(bh); 3246 return ret; 3247 } 3248 3249 static int ocfs2_init_xattr_set_ctxt(struct inode *inode, 3250 struct ocfs2_dinode *di, 3251 struct ocfs2_xattr_info *xi, 3252 struct ocfs2_xattr_search *xis, 3253 struct ocfs2_xattr_search *xbs, 3254 struct ocfs2_xattr_set_ctxt *ctxt, 3255 int extra_meta, 3256 int *credits) 3257 { 3258 int clusters_add, meta_add, ret; 3259 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 3260 3261 memset(ctxt, 0, sizeof(struct ocfs2_xattr_set_ctxt)); 3262 3263 ocfs2_init_dealloc_ctxt(&ctxt->dealloc); 3264 3265 ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs, 3266 &clusters_add, &meta_add, credits); 3267 if (ret) { 3268 mlog_errno(ret); 3269 return ret; 3270 } 3271 3272 meta_add += extra_meta; 3273 trace_ocfs2_init_xattr_set_ctxt(xi->xi_name, meta_add, 3274 clusters_add, *credits); 3275 3276 if (meta_add) { 3277 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, 3278 &ctxt->meta_ac); 3279 if (ret) { 3280 mlog_errno(ret); 3281 goto out; 3282 } 3283 } 3284 3285 if (clusters_add) { 3286 ret = ocfs2_reserve_clusters(osb, clusters_add, &ctxt->data_ac); 3287 if (ret) 3288 mlog_errno(ret); 3289 } 3290 out: 3291 if (ret) { 3292 if (ctxt->meta_ac) { 3293 ocfs2_free_alloc_context(ctxt->meta_ac); 3294 ctxt->meta_ac = NULL; 3295 } 3296 3297 /* 3298 * We cannot have an error and a non null ctxt->data_ac. 3299 */ 3300 } 3301 3302 return ret; 3303 } 3304 3305 static int __ocfs2_xattr_set_handle(struct inode *inode, 3306 struct ocfs2_dinode *di, 3307 struct ocfs2_xattr_info *xi, 3308 struct ocfs2_xattr_search *xis, 3309 struct ocfs2_xattr_search *xbs, 3310 struct ocfs2_xattr_set_ctxt *ctxt) 3311 { 3312 int ret = 0, credits, old_found; 3313 3314 if (!xi->xi_value) { 3315 /* Remove existing extended attribute */ 3316 if (!xis->not_found) 3317 ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt); 3318 else if (!xbs->not_found) 3319 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); 3320 } else { 3321 /* We always try to set extended attribute into inode first*/ 3322 ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt); 3323 if (!ret && !xbs->not_found) { 3324 /* 3325 * If succeed and that extended attribute existing in 3326 * external block, then we will remove it. 3327 */ 3328 xi->xi_value = NULL; 3329 xi->xi_value_len = 0; 3330 3331 old_found = xis->not_found; 3332 xis->not_found = -ENODATA; 3333 ret = ocfs2_calc_xattr_set_need(inode, 3334 di, 3335 xi, 3336 xis, 3337 xbs, 3338 NULL, 3339 NULL, 3340 &credits); 3341 xis->not_found = old_found; 3342 if (ret) { 3343 mlog_errno(ret); 3344 goto out; 3345 } 3346 3347 ret = ocfs2_extend_trans(ctxt->handle, credits); 3348 if (ret) { 3349 mlog_errno(ret); 3350 goto out; 3351 } 3352 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); 3353 } else if ((ret == -ENOSPC) && !ctxt->set_abort) { 3354 if (di->i_xattr_loc && !xbs->xattr_bh) { 3355 ret = ocfs2_xattr_block_find(inode, 3356 xi->xi_name_index, 3357 xi->xi_name, xbs); 3358 if (ret) 3359 goto out; 3360 3361 old_found = xis->not_found; 3362 xis->not_found = -ENODATA; 3363 ret = ocfs2_calc_xattr_set_need(inode, 3364 di, 3365 xi, 3366 xis, 3367 xbs, 3368 NULL, 3369 NULL, 3370 &credits); 3371 xis->not_found = old_found; 3372 if (ret) { 3373 mlog_errno(ret); 3374 goto out; 3375 } 3376 3377 ret = ocfs2_extend_trans(ctxt->handle, credits); 3378 if (ret) { 3379 mlog_errno(ret); 3380 goto out; 3381 } 3382 } 3383 /* 3384 * If no space in inode, we will set extended attribute 3385 * into external block. 3386 */ 3387 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); 3388 if (ret) 3389 goto out; 3390 if (!xis->not_found) { 3391 /* 3392 * If succeed and that extended attribute 3393 * existing in inode, we will remove it. 3394 */ 3395 xi->xi_value = NULL; 3396 xi->xi_value_len = 0; 3397 xbs->not_found = -ENODATA; 3398 ret = ocfs2_calc_xattr_set_need(inode, 3399 di, 3400 xi, 3401 xis, 3402 xbs, 3403 NULL, 3404 NULL, 3405 &credits); 3406 if (ret) { 3407 mlog_errno(ret); 3408 goto out; 3409 } 3410 3411 ret = ocfs2_extend_trans(ctxt->handle, credits); 3412 if (ret) { 3413 mlog_errno(ret); 3414 goto out; 3415 } 3416 ret = ocfs2_xattr_ibody_set(inode, xi, 3417 xis, ctxt); 3418 } 3419 } 3420 } 3421 3422 if (!ret) { 3423 /* Update inode ctime. */ 3424 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), 3425 xis->inode_bh, 3426 OCFS2_JOURNAL_ACCESS_WRITE); 3427 if (ret) { 3428 mlog_errno(ret); 3429 goto out; 3430 } 3431 3432 inode_set_ctime_current(inode); 3433 di->i_ctime = cpu_to_le64(inode_get_ctime_sec(inode)); 3434 di->i_ctime_nsec = cpu_to_le32(inode_get_ctime_nsec(inode)); 3435 ocfs2_journal_dirty(ctxt->handle, xis->inode_bh); 3436 } 3437 out: 3438 return ret; 3439 } 3440 3441 /* 3442 * This function only called duing creating inode 3443 * for init security/acl xattrs of the new inode. 3444 * All transanction credits have been reserved in mknod. 3445 */ 3446 int ocfs2_xattr_set_handle(handle_t *handle, 3447 struct inode *inode, 3448 struct buffer_head *di_bh, 3449 int name_index, 3450 const char *name, 3451 const void *value, 3452 size_t value_len, 3453 int flags, 3454 struct ocfs2_alloc_context *meta_ac, 3455 struct ocfs2_alloc_context *data_ac) 3456 { 3457 struct ocfs2_dinode *di; 3458 int ret; 3459 3460 struct ocfs2_xattr_info xi = { 3461 .xi_name_index = name_index, 3462 .xi_name = name, 3463 .xi_name_len = strlen(name), 3464 .xi_value = value, 3465 .xi_value_len = value_len, 3466 }; 3467 3468 struct ocfs2_xattr_search xis = { 3469 .not_found = -ENODATA, 3470 }; 3471 3472 struct ocfs2_xattr_search xbs = { 3473 .not_found = -ENODATA, 3474 }; 3475 3476 struct ocfs2_xattr_set_ctxt ctxt = { 3477 .handle = handle, 3478 .meta_ac = meta_ac, 3479 .data_ac = data_ac, 3480 }; 3481 3482 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) 3483 return -EOPNOTSUPP; 3484 3485 /* 3486 * In extreme situation, may need xattr bucket when 3487 * block size is too small. And we have already reserved 3488 * the credits for bucket in mknod. 3489 */ 3490 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) { 3491 xbs.bucket = ocfs2_xattr_bucket_new(inode); 3492 if (!xbs.bucket) { 3493 mlog_errno(-ENOMEM); 3494 return -ENOMEM; 3495 } 3496 } 3497 3498 xis.inode_bh = xbs.inode_bh = di_bh; 3499 di = (struct ocfs2_dinode *)di_bh->b_data; 3500 3501 down_write(&OCFS2_I(inode)->ip_xattr_sem); 3502 3503 ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis); 3504 if (ret) 3505 goto cleanup; 3506 if (xis.not_found) { 3507 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs); 3508 if (ret) 3509 goto cleanup; 3510 } 3511 3512 ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt); 3513 3514 cleanup: 3515 up_write(&OCFS2_I(inode)->ip_xattr_sem); 3516 brelse(xbs.xattr_bh); 3517 ocfs2_xattr_bucket_free(xbs.bucket); 3518 3519 return ret; 3520 } 3521 3522 /* 3523 * ocfs2_xattr_set() 3524 * 3525 * Set, replace or remove an extended attribute for this inode. 3526 * value is NULL to remove an existing extended attribute, else either 3527 * create or replace an extended attribute. 3528 */ 3529 int ocfs2_xattr_set(struct inode *inode, 3530 int name_index, 3531 const char *name, 3532 const void *value, 3533 size_t value_len, 3534 int flags) 3535 { 3536 struct buffer_head *di_bh = NULL; 3537 struct ocfs2_dinode *di; 3538 int ret, credits, had_lock, ref_meta = 0, ref_credits = 0; 3539 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 3540 struct inode *tl_inode = osb->osb_tl_inode; 3541 struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, NULL, }; 3542 struct ocfs2_refcount_tree *ref_tree = NULL; 3543 struct ocfs2_lock_holder oh; 3544 3545 struct ocfs2_xattr_info xi = { 3546 .xi_name_index = name_index, 3547 .xi_name = name, 3548 .xi_name_len = strlen(name), 3549 .xi_value = value, 3550 .xi_value_len = value_len, 3551 }; 3552 3553 struct ocfs2_xattr_search xis = { 3554 .not_found = -ENODATA, 3555 }; 3556 3557 struct ocfs2_xattr_search xbs = { 3558 .not_found = -ENODATA, 3559 }; 3560 3561 if (!ocfs2_supports_xattr(osb)) 3562 return -EOPNOTSUPP; 3563 3564 /* 3565 * Only xbs will be used on indexed trees. xis doesn't need a 3566 * bucket. 3567 */ 3568 xbs.bucket = ocfs2_xattr_bucket_new(inode); 3569 if (!xbs.bucket) { 3570 mlog_errno(-ENOMEM); 3571 return -ENOMEM; 3572 } 3573 3574 had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 1, &oh); 3575 if (had_lock < 0) { 3576 ret = had_lock; 3577 mlog_errno(ret); 3578 goto cleanup_nolock; 3579 } 3580 xis.inode_bh = xbs.inode_bh = di_bh; 3581 di = (struct ocfs2_dinode *)di_bh->b_data; 3582 3583 down_write(&OCFS2_I(inode)->ip_xattr_sem); 3584 /* 3585 * Scan inode and external block to find the same name 3586 * extended attribute and collect search information. 3587 */ 3588 ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis); 3589 if (ret) 3590 goto cleanup; 3591 if (xis.not_found) { 3592 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs); 3593 if (ret) 3594 goto cleanup; 3595 } 3596 3597 if (xis.not_found && xbs.not_found) { 3598 ret = -ENODATA; 3599 if (flags & XATTR_REPLACE) 3600 goto cleanup; 3601 ret = 0; 3602 if (!value) 3603 goto cleanup; 3604 } else { 3605 ret = -EEXIST; 3606 if (flags & XATTR_CREATE) 3607 goto cleanup; 3608 } 3609 3610 /* Check whether the value is refcounted and do some preparation. */ 3611 if (ocfs2_is_refcount_inode(inode) && 3612 (!xis.not_found || !xbs.not_found)) { 3613 ret = ocfs2_prepare_refcount_xattr(inode, di, &xi, 3614 &xis, &xbs, &ref_tree, 3615 &ref_meta, &ref_credits); 3616 if (ret) { 3617 mlog_errno(ret); 3618 goto cleanup; 3619 } 3620 } 3621 3622 inode_lock(tl_inode); 3623 3624 if (ocfs2_truncate_log_needs_flush(osb)) { 3625 ret = __ocfs2_flush_truncate_log(osb); 3626 if (ret < 0) { 3627 inode_unlock(tl_inode); 3628 mlog_errno(ret); 3629 goto cleanup; 3630 } 3631 } 3632 inode_unlock(tl_inode); 3633 3634 ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis, 3635 &xbs, &ctxt, ref_meta, &credits); 3636 if (ret) { 3637 mlog_errno(ret); 3638 goto cleanup; 3639 } 3640 3641 /* we need to update inode's ctime field, so add credit for it. */ 3642 credits += OCFS2_INODE_UPDATE_CREDITS; 3643 ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits); 3644 if (IS_ERR(ctxt.handle)) { 3645 ret = PTR_ERR(ctxt.handle); 3646 mlog_errno(ret); 3647 goto out_free_ac; 3648 } 3649 3650 ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt); 3651 ocfs2_update_inode_fsync_trans(ctxt.handle, inode, 0); 3652 3653 ocfs2_commit_trans(osb, ctxt.handle); 3654 3655 out_free_ac: 3656 if (ctxt.data_ac) 3657 ocfs2_free_alloc_context(ctxt.data_ac); 3658 if (ctxt.meta_ac) 3659 ocfs2_free_alloc_context(ctxt.meta_ac); 3660 if (ocfs2_dealloc_has_cluster(&ctxt.dealloc)) 3661 ocfs2_schedule_truncate_log_flush(osb, 1); 3662 ocfs2_run_deallocs(osb, &ctxt.dealloc); 3663 3664 cleanup: 3665 if (ref_tree) 3666 ocfs2_unlock_refcount_tree(osb, ref_tree, 1); 3667 up_write(&OCFS2_I(inode)->ip_xattr_sem); 3668 if (!value && !ret) { 3669 ret = ocfs2_try_remove_refcount_tree(inode, di_bh); 3670 if (ret) 3671 mlog_errno(ret); 3672 } 3673 ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock); 3674 cleanup_nolock: 3675 brelse(di_bh); 3676 brelse(xbs.xattr_bh); 3677 ocfs2_xattr_bucket_free(xbs.bucket); 3678 3679 return ret; 3680 } 3681 3682 /* 3683 * Find the xattr extent rec which may contains name_hash. 3684 * e_cpos will be the first name hash of the xattr rec. 3685 * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list. 3686 */ 3687 static int ocfs2_xattr_get_rec(struct inode *inode, 3688 u32 name_hash, 3689 u64 *p_blkno, 3690 u32 *e_cpos, 3691 u32 *num_clusters, 3692 struct ocfs2_extent_list *el) 3693 { 3694 int ret = 0, i; 3695 struct buffer_head *eb_bh = NULL; 3696 struct ocfs2_extent_block *eb; 3697 struct ocfs2_extent_rec *rec = NULL; 3698 u64 e_blkno = 0; 3699 3700 if (el->l_tree_depth) { 3701 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, name_hash, 3702 &eb_bh); 3703 if (ret) { 3704 mlog_errno(ret); 3705 goto out; 3706 } 3707 3708 eb = (struct ocfs2_extent_block *) eb_bh->b_data; 3709 el = &eb->h_list; 3710 3711 if (el->l_tree_depth) { 3712 ret = ocfs2_error(inode->i_sb, 3713 "Inode %lu has non zero tree depth in xattr tree block %llu\n", 3714 inode->i_ino, 3715 (unsigned long long)eb_bh->b_blocknr); 3716 goto out; 3717 } 3718 } 3719 3720 for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) { 3721 rec = &el->l_recs[i]; 3722 3723 if (le32_to_cpu(rec->e_cpos) <= name_hash) { 3724 e_blkno = le64_to_cpu(rec->e_blkno); 3725 break; 3726 } 3727 } 3728 3729 if (!e_blkno) { 3730 ret = ocfs2_error(inode->i_sb, "Inode %lu has bad extent record (%u, %u, 0) in xattr\n", 3731 inode->i_ino, 3732 le32_to_cpu(rec->e_cpos), 3733 ocfs2_rec_clusters(el, rec)); 3734 goto out; 3735 } 3736 3737 *p_blkno = le64_to_cpu(rec->e_blkno); 3738 *num_clusters = le16_to_cpu(rec->e_leaf_clusters); 3739 if (e_cpos) 3740 *e_cpos = le32_to_cpu(rec->e_cpos); 3741 out: 3742 brelse(eb_bh); 3743 return ret; 3744 } 3745 3746 typedef int (xattr_bucket_func)(struct inode *inode, 3747 struct ocfs2_xattr_bucket *bucket, 3748 void *para); 3749 3750 static int ocfs2_find_xe_in_bucket(struct inode *inode, 3751 struct ocfs2_xattr_bucket *bucket, 3752 int name_index, 3753 const char *name, 3754 u32 name_hash, 3755 u16 *xe_index, 3756 int *found) 3757 { 3758 int i, ret = 0, cmp = 1, block_off, new_offset; 3759 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 3760 size_t name_len = strlen(name); 3761 struct ocfs2_xattr_entry *xe = NULL; 3762 char *xe_name; 3763 3764 /* 3765 * We don't use binary search in the bucket because there 3766 * may be multiple entries with the same name hash. 3767 */ 3768 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 3769 xe = &xh->xh_entries[i]; 3770 3771 if (name_hash > le32_to_cpu(xe->xe_name_hash)) 3772 continue; 3773 else if (name_hash < le32_to_cpu(xe->xe_name_hash)) 3774 break; 3775 3776 cmp = name_index - ocfs2_xattr_get_type(xe); 3777 if (!cmp) 3778 cmp = name_len - xe->xe_name_len; 3779 if (cmp) 3780 continue; 3781 3782 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 3783 xh, 3784 i, 3785 &block_off, 3786 &new_offset); 3787 if (ret) { 3788 mlog_errno(ret); 3789 break; 3790 } 3791 3792 3793 xe_name = bucket_block(bucket, block_off) + new_offset; 3794 if (!memcmp(name, xe_name, name_len)) { 3795 *xe_index = i; 3796 *found = 1; 3797 ret = 0; 3798 break; 3799 } 3800 } 3801 3802 return ret; 3803 } 3804 3805 /* 3806 * Find the specified xattr entry in a series of buckets. 3807 * This series start from p_blkno and last for num_clusters. 3808 * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains 3809 * the num of the valid buckets. 3810 * 3811 * Return the buffer_head this xattr should reside in. And if the xattr's 3812 * hash is in the gap of 2 buckets, return the lower bucket. 3813 */ 3814 static int ocfs2_xattr_bucket_find(struct inode *inode, 3815 int name_index, 3816 const char *name, 3817 u32 name_hash, 3818 u64 p_blkno, 3819 u32 first_hash, 3820 u32 num_clusters, 3821 struct ocfs2_xattr_search *xs) 3822 { 3823 int ret, found = 0; 3824 struct ocfs2_xattr_header *xh = NULL; 3825 struct ocfs2_xattr_entry *xe = NULL; 3826 u16 index = 0; 3827 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3828 int low_bucket = 0, bucket, high_bucket; 3829 struct ocfs2_xattr_bucket *search; 3830 u64 blkno, lower_blkno = 0; 3831 3832 search = ocfs2_xattr_bucket_new(inode); 3833 if (!search) { 3834 ret = -ENOMEM; 3835 mlog_errno(ret); 3836 goto out; 3837 } 3838 3839 ret = ocfs2_read_xattr_bucket(search, p_blkno); 3840 if (ret) { 3841 mlog_errno(ret); 3842 goto out; 3843 } 3844 3845 xh = bucket_xh(search); 3846 high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1; 3847 while (low_bucket <= high_bucket) { 3848 ocfs2_xattr_bucket_relse(search); 3849 3850 bucket = (low_bucket + high_bucket) / 2; 3851 blkno = p_blkno + bucket * blk_per_bucket; 3852 ret = ocfs2_read_xattr_bucket(search, blkno); 3853 if (ret) { 3854 mlog_errno(ret); 3855 goto out; 3856 } 3857 3858 xh = bucket_xh(search); 3859 xe = &xh->xh_entries[0]; 3860 if (name_hash < le32_to_cpu(xe->xe_name_hash)) { 3861 high_bucket = bucket - 1; 3862 continue; 3863 } 3864 3865 /* 3866 * Check whether the hash of the last entry in our 3867 * bucket is larger than the search one. for an empty 3868 * bucket, the last one is also the first one. 3869 */ 3870 if (xh->xh_count) 3871 xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1]; 3872 3873 /* record lower_blkno which may be the insert place. */ 3874 lower_blkno = blkno; 3875 3876 if (name_hash > le32_to_cpu(xe->xe_name_hash)) { 3877 low_bucket = bucket + 1; 3878 continue; 3879 } 3880 3881 /* the searched xattr should reside in this bucket if exists. */ 3882 ret = ocfs2_find_xe_in_bucket(inode, search, 3883 name_index, name, name_hash, 3884 &index, &found); 3885 if (ret) { 3886 mlog_errno(ret); 3887 goto out; 3888 } 3889 break; 3890 } 3891 3892 /* 3893 * Record the bucket we have found. 3894 * When the xattr's hash value is in the gap of 2 buckets, we will 3895 * always set it to the previous bucket. 3896 */ 3897 if (!lower_blkno) 3898 lower_blkno = p_blkno; 3899 3900 /* This should be in cache - we just read it during the search */ 3901 ret = ocfs2_read_xattr_bucket(xs->bucket, lower_blkno); 3902 if (ret) { 3903 mlog_errno(ret); 3904 goto out; 3905 } 3906 3907 xs->header = bucket_xh(xs->bucket); 3908 xs->base = bucket_block(xs->bucket, 0); 3909 xs->end = xs->base + inode->i_sb->s_blocksize; 3910 3911 if (found) { 3912 xs->here = &xs->header->xh_entries[index]; 3913 trace_ocfs2_xattr_bucket_find(OCFS2_I(inode)->ip_blkno, 3914 name, name_index, name_hash, 3915 (unsigned long long)bucket_blkno(xs->bucket), 3916 index); 3917 } else 3918 ret = -ENODATA; 3919 3920 out: 3921 ocfs2_xattr_bucket_free(search); 3922 return ret; 3923 } 3924 3925 static int ocfs2_xattr_index_block_find(struct inode *inode, 3926 struct buffer_head *root_bh, 3927 int name_index, 3928 const char *name, 3929 struct ocfs2_xattr_search *xs) 3930 { 3931 int ret; 3932 struct ocfs2_xattr_block *xb = 3933 (struct ocfs2_xattr_block *)root_bh->b_data; 3934 struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root; 3935 struct ocfs2_extent_list *el = &xb_root->xt_list; 3936 u64 p_blkno = 0; 3937 u32 first_hash, num_clusters = 0; 3938 u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name)); 3939 3940 if (le16_to_cpu(el->l_next_free_rec) == 0) 3941 return -ENODATA; 3942 3943 trace_ocfs2_xattr_index_block_find(OCFS2_I(inode)->ip_blkno, 3944 name, name_index, name_hash, 3945 (unsigned long long)root_bh->b_blocknr, 3946 -1); 3947 3948 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash, 3949 &num_clusters, el); 3950 if (ret) { 3951 mlog_errno(ret); 3952 goto out; 3953 } 3954 3955 BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash); 3956 3957 trace_ocfs2_xattr_index_block_find_rec(OCFS2_I(inode)->ip_blkno, 3958 name, name_index, first_hash, 3959 (unsigned long long)p_blkno, 3960 num_clusters); 3961 3962 ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash, 3963 p_blkno, first_hash, num_clusters, xs); 3964 3965 out: 3966 return ret; 3967 } 3968 3969 static int ocfs2_iterate_xattr_buckets(struct inode *inode, 3970 u64 blkno, 3971 u32 clusters, 3972 xattr_bucket_func *func, 3973 void *para) 3974 { 3975 int i, ret = 0; 3976 u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)); 3977 u32 num_buckets = clusters * bpc; 3978 struct ocfs2_xattr_bucket *bucket; 3979 3980 bucket = ocfs2_xattr_bucket_new(inode); 3981 if (!bucket) { 3982 mlog_errno(-ENOMEM); 3983 return -ENOMEM; 3984 } 3985 3986 trace_ocfs2_iterate_xattr_buckets( 3987 (unsigned long long)OCFS2_I(inode)->ip_blkno, 3988 (unsigned long long)blkno, clusters); 3989 3990 for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) { 3991 ret = ocfs2_read_xattr_bucket(bucket, blkno); 3992 if (ret) { 3993 mlog_errno(ret); 3994 break; 3995 } 3996 3997 /* 3998 * The real bucket num in this series of blocks is stored 3999 * in the 1st bucket. 4000 */ 4001 if (i == 0) 4002 num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets); 4003 4004 trace_ocfs2_iterate_xattr_bucket((unsigned long long)blkno, 4005 le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash)); 4006 if (func) { 4007 ret = func(inode, bucket, para); 4008 if (ret && ret != -ERANGE) 4009 mlog_errno(ret); 4010 /* Fall through to bucket_relse() */ 4011 } 4012 4013 ocfs2_xattr_bucket_relse(bucket); 4014 if (ret) 4015 break; 4016 } 4017 4018 ocfs2_xattr_bucket_free(bucket); 4019 return ret; 4020 } 4021 4022 struct ocfs2_xattr_tree_list { 4023 char *buffer; 4024 size_t buffer_size; 4025 size_t result; 4026 }; 4027 4028 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb, 4029 struct ocfs2_xattr_header *xh, 4030 int index, 4031 int *block_off, 4032 int *new_offset) 4033 { 4034 u16 name_offset; 4035 4036 if (index < 0 || index >= le16_to_cpu(xh->xh_count)) 4037 return -EINVAL; 4038 4039 name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset); 4040 4041 *block_off = name_offset >> sb->s_blocksize_bits; 4042 *new_offset = name_offset % sb->s_blocksize; 4043 4044 return 0; 4045 } 4046 4047 static int ocfs2_list_xattr_bucket(struct inode *inode, 4048 struct ocfs2_xattr_bucket *bucket, 4049 void *para) 4050 { 4051 int ret = 0, type; 4052 struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para; 4053 int i, block_off, new_offset; 4054 const char *name; 4055 4056 for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) { 4057 struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i]; 4058 type = ocfs2_xattr_get_type(entry); 4059 4060 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 4061 bucket_xh(bucket), 4062 i, 4063 &block_off, 4064 &new_offset); 4065 if (ret) 4066 break; 4067 4068 name = (const char *)bucket_block(bucket, block_off) + 4069 new_offset; 4070 ret = ocfs2_xattr_list_entry(inode->i_sb, 4071 xl->buffer, 4072 xl->buffer_size, 4073 &xl->result, 4074 type, name, 4075 entry->xe_name_len); 4076 if (ret) 4077 break; 4078 } 4079 4080 return ret; 4081 } 4082 4083 static int ocfs2_iterate_xattr_index_block(struct inode *inode, 4084 struct buffer_head *blk_bh, 4085 xattr_tree_rec_func *rec_func, 4086 void *para) 4087 { 4088 struct ocfs2_xattr_block *xb = 4089 (struct ocfs2_xattr_block *)blk_bh->b_data; 4090 struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list; 4091 int ret = 0; 4092 u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0; 4093 u64 p_blkno = 0; 4094 4095 if (!el->l_next_free_rec || !rec_func) 4096 return 0; 4097 4098 while (name_hash > 0) { 4099 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, 4100 &e_cpos, &num_clusters, el); 4101 if (ret) { 4102 mlog_errno(ret); 4103 break; 4104 } 4105 4106 ret = rec_func(inode, blk_bh, p_blkno, e_cpos, 4107 num_clusters, para); 4108 if (ret) { 4109 if (ret != -ERANGE) 4110 mlog_errno(ret); 4111 break; 4112 } 4113 4114 if (e_cpos == 0) 4115 break; 4116 4117 name_hash = e_cpos - 1; 4118 } 4119 4120 return ret; 4121 4122 } 4123 4124 static int ocfs2_list_xattr_tree_rec(struct inode *inode, 4125 struct buffer_head *root_bh, 4126 u64 blkno, u32 cpos, u32 len, void *para) 4127 { 4128 return ocfs2_iterate_xattr_buckets(inode, blkno, len, 4129 ocfs2_list_xattr_bucket, para); 4130 } 4131 4132 static int ocfs2_xattr_tree_list_index_block(struct inode *inode, 4133 struct buffer_head *blk_bh, 4134 char *buffer, 4135 size_t buffer_size) 4136 { 4137 int ret; 4138 struct ocfs2_xattr_tree_list xl = { 4139 .buffer = buffer, 4140 .buffer_size = buffer_size, 4141 .result = 0, 4142 }; 4143 4144 ret = ocfs2_iterate_xattr_index_block(inode, blk_bh, 4145 ocfs2_list_xattr_tree_rec, &xl); 4146 if (ret) { 4147 mlog_errno(ret); 4148 goto out; 4149 } 4150 4151 ret = xl.result; 4152 out: 4153 return ret; 4154 } 4155 4156 static int cmp_xe(const void *a, const void *b) 4157 { 4158 const struct ocfs2_xattr_entry *l = a, *r = b; 4159 u32 l_hash = le32_to_cpu(l->xe_name_hash); 4160 u32 r_hash = le32_to_cpu(r->xe_name_hash); 4161 4162 if (l_hash > r_hash) 4163 return 1; 4164 if (l_hash < r_hash) 4165 return -1; 4166 return 0; 4167 } 4168 4169 static void swap_xe(void *a, void *b, int size) 4170 { 4171 struct ocfs2_xattr_entry *l = a, *r = b, tmp; 4172 4173 tmp = *l; 4174 memcpy(l, r, sizeof(struct ocfs2_xattr_entry)); 4175 memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry)); 4176 } 4177 4178 /* 4179 * When the ocfs2_xattr_block is filled up, new bucket will be created 4180 * and all the xattr entries will be moved to the new bucket. 4181 * The header goes at the start of the bucket, and the names+values are 4182 * filled from the end. This is why *target starts as the last buffer. 4183 * Note: we need to sort the entries since they are not saved in order 4184 * in the ocfs2_xattr_block. 4185 */ 4186 static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode, 4187 struct buffer_head *xb_bh, 4188 struct ocfs2_xattr_bucket *bucket) 4189 { 4190 int i, blocksize = inode->i_sb->s_blocksize; 4191 int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 4192 u16 offset, size, off_change; 4193 struct ocfs2_xattr_entry *xe; 4194 struct ocfs2_xattr_block *xb = 4195 (struct ocfs2_xattr_block *)xb_bh->b_data; 4196 struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header; 4197 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 4198 u16 count = le16_to_cpu(xb_xh->xh_count); 4199 char *src = xb_bh->b_data; 4200 char *target = bucket_block(bucket, blks - 1); 4201 4202 trace_ocfs2_cp_xattr_block_to_bucket_begin( 4203 (unsigned long long)xb_bh->b_blocknr, 4204 (unsigned long long)bucket_blkno(bucket)); 4205 4206 for (i = 0; i < blks; i++) 4207 memset(bucket_block(bucket, i), 0, blocksize); 4208 4209 /* 4210 * Since the xe_name_offset is based on ocfs2_xattr_header, 4211 * there is a offset change corresponding to the change of 4212 * ocfs2_xattr_header's position. 4213 */ 4214 off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); 4215 xe = &xb_xh->xh_entries[count - 1]; 4216 offset = le16_to_cpu(xe->xe_name_offset) + off_change; 4217 size = blocksize - offset; 4218 4219 /* copy all the names and values. */ 4220 memcpy(target + offset, src + offset, size); 4221 4222 /* Init new header now. */ 4223 xh->xh_count = xb_xh->xh_count; 4224 xh->xh_num_buckets = cpu_to_le16(1); 4225 xh->xh_name_value_len = cpu_to_le16(size); 4226 xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size); 4227 4228 /* copy all the entries. */ 4229 target = bucket_block(bucket, 0); 4230 offset = offsetof(struct ocfs2_xattr_header, xh_entries); 4231 size = count * sizeof(struct ocfs2_xattr_entry); 4232 memcpy(target + offset, (char *)xb_xh + offset, size); 4233 4234 /* Change the xe offset for all the xe because of the move. */ 4235 off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize + 4236 offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); 4237 for (i = 0; i < count; i++) 4238 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change); 4239 4240 trace_ocfs2_cp_xattr_block_to_bucket_end(offset, size, off_change); 4241 4242 sort(target + offset, count, sizeof(struct ocfs2_xattr_entry), 4243 cmp_xe, swap_xe); 4244 } 4245 4246 /* 4247 * After we move xattr from block to index btree, we have to 4248 * update ocfs2_xattr_search to the new xe and base. 4249 * 4250 * When the entry is in xattr block, xattr_bh indicates the storage place. 4251 * While if the entry is in index b-tree, "bucket" indicates the 4252 * real place of the xattr. 4253 */ 4254 static void ocfs2_xattr_update_xattr_search(struct inode *inode, 4255 struct ocfs2_xattr_search *xs, 4256 struct buffer_head *old_bh) 4257 { 4258 char *buf = old_bh->b_data; 4259 struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf; 4260 struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header; 4261 int i; 4262 4263 xs->header = bucket_xh(xs->bucket); 4264 xs->base = bucket_block(xs->bucket, 0); 4265 xs->end = xs->base + inode->i_sb->s_blocksize; 4266 4267 if (xs->not_found) 4268 return; 4269 4270 i = xs->here - old_xh->xh_entries; 4271 xs->here = &xs->header->xh_entries[i]; 4272 } 4273 4274 static int ocfs2_xattr_create_index_block(struct inode *inode, 4275 struct ocfs2_xattr_search *xs, 4276 struct ocfs2_xattr_set_ctxt *ctxt) 4277 { 4278 int ret; 4279 u32 bit_off, len; 4280 u64 blkno; 4281 handle_t *handle = ctxt->handle; 4282 struct ocfs2_inode_info *oi = OCFS2_I(inode); 4283 struct buffer_head *xb_bh = xs->xattr_bh; 4284 struct ocfs2_xattr_block *xb = 4285 (struct ocfs2_xattr_block *)xb_bh->b_data; 4286 struct ocfs2_xattr_tree_root *xr; 4287 u16 xb_flags = le16_to_cpu(xb->xb_flags); 4288 4289 trace_ocfs2_xattr_create_index_block_begin( 4290 (unsigned long long)xb_bh->b_blocknr); 4291 4292 BUG_ON(xb_flags & OCFS2_XATTR_INDEXED); 4293 BUG_ON(!xs->bucket); 4294 4295 /* 4296 * XXX: 4297 * We can use this lock for now, and maybe move to a dedicated mutex 4298 * if performance becomes a problem later. 4299 */ 4300 down_write(&oi->ip_alloc_sem); 4301 4302 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), xb_bh, 4303 OCFS2_JOURNAL_ACCESS_WRITE); 4304 if (ret) { 4305 mlog_errno(ret); 4306 goto out; 4307 } 4308 4309 ret = __ocfs2_claim_clusters(handle, ctxt->data_ac, 4310 1, 1, &bit_off, &len); 4311 if (ret) { 4312 mlog_errno(ret); 4313 goto out; 4314 } 4315 4316 /* 4317 * The bucket may spread in many blocks, and 4318 * we will only touch the 1st block and the last block 4319 * in the whole bucket(one for entry and one for data). 4320 */ 4321 blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off); 4322 4323 trace_ocfs2_xattr_create_index_block((unsigned long long)blkno); 4324 4325 ret = ocfs2_init_xattr_bucket(xs->bucket, blkno, 1); 4326 if (ret) { 4327 mlog_errno(ret); 4328 goto out; 4329 } 4330 4331 ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket, 4332 OCFS2_JOURNAL_ACCESS_CREATE); 4333 if (ret) { 4334 mlog_errno(ret); 4335 goto out; 4336 } 4337 4338 ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket); 4339 ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket); 4340 4341 ocfs2_xattr_update_xattr_search(inode, xs, xb_bh); 4342 4343 /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */ 4344 memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize - 4345 offsetof(struct ocfs2_xattr_block, xb_attrs)); 4346 4347 xr = &xb->xb_attrs.xb_root; 4348 xr->xt_clusters = cpu_to_le32(1); 4349 xr->xt_last_eb_blk = 0; 4350 xr->xt_list.l_tree_depth = 0; 4351 xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb)); 4352 xr->xt_list.l_next_free_rec = cpu_to_le16(1); 4353 4354 xr->xt_list.l_recs[0].e_cpos = 0; 4355 xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno); 4356 xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1); 4357 4358 xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED); 4359 4360 ocfs2_journal_dirty(handle, xb_bh); 4361 4362 out: 4363 up_write(&oi->ip_alloc_sem); 4364 4365 return ret; 4366 } 4367 4368 static int cmp_xe_offset(const void *a, const void *b) 4369 { 4370 const struct ocfs2_xattr_entry *l = a, *r = b; 4371 u32 l_name_offset = le16_to_cpu(l->xe_name_offset); 4372 u32 r_name_offset = le16_to_cpu(r->xe_name_offset); 4373 4374 if (l_name_offset < r_name_offset) 4375 return 1; 4376 if (l_name_offset > r_name_offset) 4377 return -1; 4378 return 0; 4379 } 4380 4381 /* 4382 * defrag a xattr bucket if we find that the bucket has some 4383 * holes beteen name/value pairs. 4384 * We will move all the name/value pairs to the end of the bucket 4385 * so that we can spare some space for insertion. 4386 */ 4387 static int ocfs2_defrag_xattr_bucket(struct inode *inode, 4388 handle_t *handle, 4389 struct ocfs2_xattr_bucket *bucket) 4390 { 4391 int ret, i; 4392 size_t end, offset, len; 4393 struct ocfs2_xattr_header *xh; 4394 char *entries, *buf, *bucket_buf = NULL; 4395 u64 blkno = bucket_blkno(bucket); 4396 u16 xh_free_start; 4397 size_t blocksize = inode->i_sb->s_blocksize; 4398 struct ocfs2_xattr_entry *xe; 4399 4400 /* 4401 * In order to make the operation more efficient and generic, 4402 * we copy all the blocks into a contiguous memory and do the 4403 * defragment there, so if anything is error, we will not touch 4404 * the real block. 4405 */ 4406 bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS); 4407 if (!bucket_buf) { 4408 ret = -EIO; 4409 goto out; 4410 } 4411 4412 buf = bucket_buf; 4413 for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize) 4414 memcpy(buf, bucket_block(bucket, i), blocksize); 4415 4416 ret = ocfs2_xattr_bucket_journal_access(handle, bucket, 4417 OCFS2_JOURNAL_ACCESS_WRITE); 4418 if (ret < 0) { 4419 mlog_errno(ret); 4420 goto out; 4421 } 4422 4423 xh = (struct ocfs2_xattr_header *)bucket_buf; 4424 entries = (char *)xh->xh_entries; 4425 xh_free_start = le16_to_cpu(xh->xh_free_start); 4426 4427 trace_ocfs2_defrag_xattr_bucket( 4428 (unsigned long long)blkno, le16_to_cpu(xh->xh_count), 4429 xh_free_start, le16_to_cpu(xh->xh_name_value_len)); 4430 4431 /* 4432 * sort all the entries by their offset. 4433 * the largest will be the first, so that we can 4434 * move them to the end one by one. 4435 */ 4436 sort(entries, le16_to_cpu(xh->xh_count), 4437 sizeof(struct ocfs2_xattr_entry), 4438 cmp_xe_offset, swap_xe); 4439 4440 /* Move all name/values to the end of the bucket. */ 4441 xe = xh->xh_entries; 4442 end = OCFS2_XATTR_BUCKET_SIZE; 4443 for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) { 4444 offset = le16_to_cpu(xe->xe_name_offset); 4445 len = namevalue_size_xe(xe); 4446 4447 /* 4448 * We must make sure that the name/value pair 4449 * exist in the same block. So adjust end to 4450 * the previous block end if needed. 4451 */ 4452 if (((end - len) / blocksize != 4453 (end - 1) / blocksize)) 4454 end = end - end % blocksize; 4455 4456 if (end > offset + len) { 4457 memmove(bucket_buf + end - len, 4458 bucket_buf + offset, len); 4459 xe->xe_name_offset = cpu_to_le16(end - len); 4460 } 4461 4462 mlog_bug_on_msg(end < offset + len, "Defrag check failed for " 4463 "bucket %llu\n", (unsigned long long)blkno); 4464 4465 end -= len; 4466 } 4467 4468 mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for " 4469 "bucket %llu\n", (unsigned long long)blkno); 4470 4471 if (xh_free_start == end) 4472 goto out; 4473 4474 memset(bucket_buf + xh_free_start, 0, end - xh_free_start); 4475 xh->xh_free_start = cpu_to_le16(end); 4476 4477 /* sort the entries by their name_hash. */ 4478 sort(entries, le16_to_cpu(xh->xh_count), 4479 sizeof(struct ocfs2_xattr_entry), 4480 cmp_xe, swap_xe); 4481 4482 buf = bucket_buf; 4483 for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize) 4484 memcpy(bucket_block(bucket, i), buf, blocksize); 4485 ocfs2_xattr_bucket_journal_dirty(handle, bucket); 4486 4487 out: 4488 kfree(bucket_buf); 4489 return ret; 4490 } 4491 4492 /* 4493 * prev_blkno points to the start of an existing extent. new_blkno 4494 * points to a newly allocated extent. Because we know each of our 4495 * clusters contains more than bucket, we can easily split one cluster 4496 * at a bucket boundary. So we take the last cluster of the existing 4497 * extent and split it down the middle. We move the last half of the 4498 * buckets in the last cluster of the existing extent over to the new 4499 * extent. 4500 * 4501 * first_bh is the buffer at prev_blkno so we can update the existing 4502 * extent's bucket count. header_bh is the bucket were we were hoping 4503 * to insert our xattr. If the bucket move places the target in the new 4504 * extent, we'll update first_bh and header_bh after modifying the old 4505 * extent. 4506 * 4507 * first_hash will be set as the 1st xe's name_hash in the new extent. 4508 */ 4509 static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode, 4510 handle_t *handle, 4511 struct ocfs2_xattr_bucket *first, 4512 struct ocfs2_xattr_bucket *target, 4513 u64 new_blkno, 4514 u32 num_clusters, 4515 u32 *first_hash) 4516 { 4517 int ret; 4518 struct super_block *sb = inode->i_sb; 4519 int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(sb); 4520 int num_buckets = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb)); 4521 int to_move = num_buckets / 2; 4522 u64 src_blkno; 4523 u64 last_cluster_blkno = bucket_blkno(first) + 4524 ((num_clusters - 1) * ocfs2_clusters_to_blocks(sb, 1)); 4525 4526 BUG_ON(le16_to_cpu(bucket_xh(first)->xh_num_buckets) < num_buckets); 4527 BUG_ON(OCFS2_XATTR_BUCKET_SIZE == OCFS2_SB(sb)->s_clustersize); 4528 4529 trace_ocfs2_mv_xattr_bucket_cross_cluster( 4530 (unsigned long long)last_cluster_blkno, 4531 (unsigned long long)new_blkno); 4532 4533 ret = ocfs2_mv_xattr_buckets(inode, handle, bucket_blkno(first), 4534 last_cluster_blkno, new_blkno, 4535 to_move, first_hash); 4536 if (ret) { 4537 mlog_errno(ret); 4538 goto out; 4539 } 4540 4541 /* This is the first bucket that got moved */ 4542 src_blkno = last_cluster_blkno + (to_move * blks_per_bucket); 4543 4544 /* 4545 * If the target bucket was part of the moved buckets, we need to 4546 * update first and target. 4547 */ 4548 if (bucket_blkno(target) >= src_blkno) { 4549 /* Find the block for the new target bucket */ 4550 src_blkno = new_blkno + 4551 (bucket_blkno(target) - src_blkno); 4552 4553 ocfs2_xattr_bucket_relse(first); 4554 ocfs2_xattr_bucket_relse(target); 4555 4556 /* 4557 * These shouldn't fail - the buffers are in the 4558 * journal from ocfs2_cp_xattr_bucket(). 4559 */ 4560 ret = ocfs2_read_xattr_bucket(first, new_blkno); 4561 if (ret) { 4562 mlog_errno(ret); 4563 goto out; 4564 } 4565 ret = ocfs2_read_xattr_bucket(target, src_blkno); 4566 if (ret) 4567 mlog_errno(ret); 4568 4569 } 4570 4571 out: 4572 return ret; 4573 } 4574 4575 /* 4576 * Find the suitable pos when we divide a bucket into 2. 4577 * We have to make sure the xattrs with the same hash value exist 4578 * in the same bucket. 4579 * 4580 * If this ocfs2_xattr_header covers more than one hash value, find a 4581 * place where the hash value changes. Try to find the most even split. 4582 * The most common case is that all entries have different hash values, 4583 * and the first check we make will find a place to split. 4584 */ 4585 static int ocfs2_xattr_find_divide_pos(struct ocfs2_xattr_header *xh) 4586 { 4587 struct ocfs2_xattr_entry *entries = xh->xh_entries; 4588 int count = le16_to_cpu(xh->xh_count); 4589 int delta, middle = count / 2; 4590 4591 /* 4592 * We start at the middle. Each step gets farther away in both 4593 * directions. We therefore hit the change in hash value 4594 * nearest to the middle. Note that this loop does not execute for 4595 * count < 2. 4596 */ 4597 for (delta = 0; delta < middle; delta++) { 4598 /* Let's check delta earlier than middle */ 4599 if (cmp_xe(&entries[middle - delta - 1], 4600 &entries[middle - delta])) 4601 return middle - delta; 4602 4603 /* For even counts, don't walk off the end */ 4604 if ((middle + delta + 1) == count) 4605 continue; 4606 4607 /* Now try delta past middle */ 4608 if (cmp_xe(&entries[middle + delta], 4609 &entries[middle + delta + 1])) 4610 return middle + delta + 1; 4611 } 4612 4613 /* Every entry had the same hash */ 4614 return count; 4615 } 4616 4617 /* 4618 * Move some xattrs in old bucket(blk) to new bucket(new_blk). 4619 * first_hash will record the 1st hash of the new bucket. 4620 * 4621 * Normally half of the xattrs will be moved. But we have to make 4622 * sure that the xattrs with the same hash value are stored in the 4623 * same bucket. If all the xattrs in this bucket have the same hash 4624 * value, the new bucket will be initialized as an empty one and the 4625 * first_hash will be initialized as (hash_value+1). 4626 */ 4627 static int ocfs2_divide_xattr_bucket(struct inode *inode, 4628 handle_t *handle, 4629 u64 blk, 4630 u64 new_blk, 4631 u32 *first_hash, 4632 int new_bucket_head) 4633 { 4634 int ret, i; 4635 int count, start, len, name_value_len = 0, name_offset = 0; 4636 struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL; 4637 struct ocfs2_xattr_header *xh; 4638 struct ocfs2_xattr_entry *xe; 4639 int blocksize = inode->i_sb->s_blocksize; 4640 4641 trace_ocfs2_divide_xattr_bucket_begin((unsigned long long)blk, 4642 (unsigned long long)new_blk); 4643 4644 s_bucket = ocfs2_xattr_bucket_new(inode); 4645 t_bucket = ocfs2_xattr_bucket_new(inode); 4646 if (!s_bucket || !t_bucket) { 4647 ret = -ENOMEM; 4648 mlog_errno(ret); 4649 goto out; 4650 } 4651 4652 ret = ocfs2_read_xattr_bucket(s_bucket, blk); 4653 if (ret) { 4654 mlog_errno(ret); 4655 goto out; 4656 } 4657 4658 ret = ocfs2_xattr_bucket_journal_access(handle, s_bucket, 4659 OCFS2_JOURNAL_ACCESS_WRITE); 4660 if (ret) { 4661 mlog_errno(ret); 4662 goto out; 4663 } 4664 4665 /* 4666 * Even if !new_bucket_head, we're overwriting t_bucket. Thus, 4667 * there's no need to read it. 4668 */ 4669 ret = ocfs2_init_xattr_bucket(t_bucket, new_blk, new_bucket_head); 4670 if (ret) { 4671 mlog_errno(ret); 4672 goto out; 4673 } 4674 4675 /* 4676 * Hey, if we're overwriting t_bucket, what difference does 4677 * ACCESS_CREATE vs ACCESS_WRITE make? See the comment in the 4678 * same part of ocfs2_cp_xattr_bucket(). 4679 */ 4680 ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket, 4681 new_bucket_head ? 4682 OCFS2_JOURNAL_ACCESS_CREATE : 4683 OCFS2_JOURNAL_ACCESS_WRITE); 4684 if (ret) { 4685 mlog_errno(ret); 4686 goto out; 4687 } 4688 4689 xh = bucket_xh(s_bucket); 4690 count = le16_to_cpu(xh->xh_count); 4691 start = ocfs2_xattr_find_divide_pos(xh); 4692 4693 if (start == count) { 4694 xe = &xh->xh_entries[start-1]; 4695 4696 /* 4697 * initialized a new empty bucket here. 4698 * The hash value is set as one larger than 4699 * that of the last entry in the previous bucket. 4700 */ 4701 for (i = 0; i < t_bucket->bu_blocks; i++) 4702 memset(bucket_block(t_bucket, i), 0, blocksize); 4703 4704 xh = bucket_xh(t_bucket); 4705 xh->xh_free_start = cpu_to_le16(blocksize); 4706 xh->xh_entries[0].xe_name_hash = xe->xe_name_hash; 4707 le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1); 4708 4709 goto set_num_buckets; 4710 } 4711 4712 /* copy the whole bucket to the new first. */ 4713 ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket); 4714 4715 /* update the new bucket. */ 4716 xh = bucket_xh(t_bucket); 4717 4718 /* 4719 * Calculate the total name/value len and xh_free_start for 4720 * the old bucket first. 4721 */ 4722 name_offset = OCFS2_XATTR_BUCKET_SIZE; 4723 name_value_len = 0; 4724 for (i = 0; i < start; i++) { 4725 xe = &xh->xh_entries[i]; 4726 name_value_len += namevalue_size_xe(xe); 4727 if (le16_to_cpu(xe->xe_name_offset) < name_offset) 4728 name_offset = le16_to_cpu(xe->xe_name_offset); 4729 } 4730 4731 /* 4732 * Now begin the modification to the new bucket. 4733 * 4734 * In the new bucket, We just move the xattr entry to the beginning 4735 * and don't touch the name/value. So there will be some holes in the 4736 * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is 4737 * called. 4738 */ 4739 xe = &xh->xh_entries[start]; 4740 len = sizeof(struct ocfs2_xattr_entry) * (count - start); 4741 trace_ocfs2_divide_xattr_bucket_move(len, 4742 (int)((char *)xe - (char *)xh), 4743 (int)((char *)xh->xh_entries - (char *)xh)); 4744 memmove((char *)xh->xh_entries, (char *)xe, len); 4745 xe = &xh->xh_entries[count - start]; 4746 len = sizeof(struct ocfs2_xattr_entry) * start; 4747 memset((char *)xe, 0, len); 4748 4749 le16_add_cpu(&xh->xh_count, -start); 4750 le16_add_cpu(&xh->xh_name_value_len, -name_value_len); 4751 4752 /* Calculate xh_free_start for the new bucket. */ 4753 xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE); 4754 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 4755 xe = &xh->xh_entries[i]; 4756 if (le16_to_cpu(xe->xe_name_offset) < 4757 le16_to_cpu(xh->xh_free_start)) 4758 xh->xh_free_start = xe->xe_name_offset; 4759 } 4760 4761 set_num_buckets: 4762 /* set xh->xh_num_buckets for the new xh. */ 4763 if (new_bucket_head) 4764 xh->xh_num_buckets = cpu_to_le16(1); 4765 else 4766 xh->xh_num_buckets = 0; 4767 4768 ocfs2_xattr_bucket_journal_dirty(handle, t_bucket); 4769 4770 /* store the first_hash of the new bucket. */ 4771 if (first_hash) 4772 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash); 4773 4774 /* 4775 * Now only update the 1st block of the old bucket. If we 4776 * just added a new empty bucket, there is no need to modify 4777 * it. 4778 */ 4779 if (start == count) 4780 goto out; 4781 4782 xh = bucket_xh(s_bucket); 4783 memset(&xh->xh_entries[start], 0, 4784 sizeof(struct ocfs2_xattr_entry) * (count - start)); 4785 xh->xh_count = cpu_to_le16(start); 4786 xh->xh_free_start = cpu_to_le16(name_offset); 4787 xh->xh_name_value_len = cpu_to_le16(name_value_len); 4788 4789 ocfs2_xattr_bucket_journal_dirty(handle, s_bucket); 4790 4791 out: 4792 ocfs2_xattr_bucket_free(s_bucket); 4793 ocfs2_xattr_bucket_free(t_bucket); 4794 4795 return ret; 4796 } 4797 4798 /* 4799 * Copy xattr from one bucket to another bucket. 4800 * 4801 * The caller must make sure that the journal transaction 4802 * has enough space for journaling. 4803 */ 4804 static int ocfs2_cp_xattr_bucket(struct inode *inode, 4805 handle_t *handle, 4806 u64 s_blkno, 4807 u64 t_blkno, 4808 int t_is_new) 4809 { 4810 int ret; 4811 struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL; 4812 4813 BUG_ON(s_blkno == t_blkno); 4814 4815 trace_ocfs2_cp_xattr_bucket((unsigned long long)s_blkno, 4816 (unsigned long long)t_blkno, 4817 t_is_new); 4818 4819 s_bucket = ocfs2_xattr_bucket_new(inode); 4820 t_bucket = ocfs2_xattr_bucket_new(inode); 4821 if (!s_bucket || !t_bucket) { 4822 ret = -ENOMEM; 4823 mlog_errno(ret); 4824 goto out; 4825 } 4826 4827 ret = ocfs2_read_xattr_bucket(s_bucket, s_blkno); 4828 if (ret) 4829 goto out; 4830 4831 /* 4832 * Even if !t_is_new, we're overwriting t_bucket. Thus, 4833 * there's no need to read it. 4834 */ 4835 ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno, t_is_new); 4836 if (ret) 4837 goto out; 4838 4839 /* 4840 * Hey, if we're overwriting t_bucket, what difference does 4841 * ACCESS_CREATE vs ACCESS_WRITE make? Well, if we allocated a new 4842 * cluster to fill, we came here from 4843 * ocfs2_mv_xattr_buckets(), and it is really new - 4844 * ACCESS_CREATE is required. But we also might have moved data 4845 * out of t_bucket before extending back into it. 4846 * ocfs2_add_new_xattr_bucket() can do this - its call to 4847 * ocfs2_add_new_xattr_cluster() may have created a new extent 4848 * and copied out the end of the old extent. Then it re-extends 4849 * the old extent back to create space for new xattrs. That's 4850 * how we get here, and the bucket isn't really new. 4851 */ 4852 ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket, 4853 t_is_new ? 4854 OCFS2_JOURNAL_ACCESS_CREATE : 4855 OCFS2_JOURNAL_ACCESS_WRITE); 4856 if (ret) 4857 goto out; 4858 4859 ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket); 4860 ocfs2_xattr_bucket_journal_dirty(handle, t_bucket); 4861 4862 out: 4863 ocfs2_xattr_bucket_free(t_bucket); 4864 ocfs2_xattr_bucket_free(s_bucket); 4865 4866 return ret; 4867 } 4868 4869 /* 4870 * src_blk points to the start of an existing extent. last_blk points to 4871 * last cluster in that extent. to_blk points to a newly allocated 4872 * extent. We copy the buckets from the cluster at last_blk to the new 4873 * extent. If start_bucket is non-zero, we skip that many buckets before 4874 * we start copying. The new extent's xh_num_buckets gets set to the 4875 * number of buckets we copied. The old extent's xh_num_buckets shrinks 4876 * by the same amount. 4877 */ 4878 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle, 4879 u64 src_blk, u64 last_blk, u64 to_blk, 4880 unsigned int start_bucket, 4881 u32 *first_hash) 4882 { 4883 int i, ret, credits; 4884 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 4885 int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 4886 int num_buckets = ocfs2_xattr_buckets_per_cluster(osb); 4887 struct ocfs2_xattr_bucket *old_first, *new_first; 4888 4889 trace_ocfs2_mv_xattr_buckets((unsigned long long)last_blk, 4890 (unsigned long long)to_blk); 4891 4892 BUG_ON(start_bucket >= num_buckets); 4893 if (start_bucket) { 4894 num_buckets -= start_bucket; 4895 last_blk += (start_bucket * blks_per_bucket); 4896 } 4897 4898 /* The first bucket of the original extent */ 4899 old_first = ocfs2_xattr_bucket_new(inode); 4900 /* The first bucket of the new extent */ 4901 new_first = ocfs2_xattr_bucket_new(inode); 4902 if (!old_first || !new_first) { 4903 ret = -ENOMEM; 4904 mlog_errno(ret); 4905 goto out; 4906 } 4907 4908 ret = ocfs2_read_xattr_bucket(old_first, src_blk); 4909 if (ret) { 4910 mlog_errno(ret); 4911 goto out; 4912 } 4913 4914 /* 4915 * We need to update the first bucket of the old extent and all 4916 * the buckets going to the new extent. 4917 */ 4918 credits = ((num_buckets + 1) * blks_per_bucket); 4919 ret = ocfs2_extend_trans(handle, credits); 4920 if (ret) { 4921 mlog_errno(ret); 4922 goto out; 4923 } 4924 4925 ret = ocfs2_xattr_bucket_journal_access(handle, old_first, 4926 OCFS2_JOURNAL_ACCESS_WRITE); 4927 if (ret) { 4928 mlog_errno(ret); 4929 goto out; 4930 } 4931 4932 for (i = 0; i < num_buckets; i++) { 4933 ret = ocfs2_cp_xattr_bucket(inode, handle, 4934 last_blk + (i * blks_per_bucket), 4935 to_blk + (i * blks_per_bucket), 4936 1); 4937 if (ret) { 4938 mlog_errno(ret); 4939 goto out; 4940 } 4941 } 4942 4943 /* 4944 * Get the new bucket ready before we dirty anything 4945 * (This actually shouldn't fail, because we already dirtied 4946 * it once in ocfs2_cp_xattr_bucket()). 4947 */ 4948 ret = ocfs2_read_xattr_bucket(new_first, to_blk); 4949 if (ret) { 4950 mlog_errno(ret); 4951 goto out; 4952 } 4953 ret = ocfs2_xattr_bucket_journal_access(handle, new_first, 4954 OCFS2_JOURNAL_ACCESS_WRITE); 4955 if (ret) { 4956 mlog_errno(ret); 4957 goto out; 4958 } 4959 4960 /* Now update the headers */ 4961 le16_add_cpu(&bucket_xh(old_first)->xh_num_buckets, -num_buckets); 4962 ocfs2_xattr_bucket_journal_dirty(handle, old_first); 4963 4964 bucket_xh(new_first)->xh_num_buckets = cpu_to_le16(num_buckets); 4965 ocfs2_xattr_bucket_journal_dirty(handle, new_first); 4966 4967 if (first_hash) 4968 *first_hash = le32_to_cpu(bucket_xh(new_first)->xh_entries[0].xe_name_hash); 4969 4970 out: 4971 ocfs2_xattr_bucket_free(new_first); 4972 ocfs2_xattr_bucket_free(old_first); 4973 return ret; 4974 } 4975 4976 /* 4977 * Move some xattrs in this cluster to the new cluster. 4978 * This function should only be called when bucket size == cluster size. 4979 * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead. 4980 */ 4981 static int ocfs2_divide_xattr_cluster(struct inode *inode, 4982 handle_t *handle, 4983 u64 prev_blk, 4984 u64 new_blk, 4985 u32 *first_hash) 4986 { 4987 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 4988 int ret, credits = 2 * blk_per_bucket; 4989 4990 BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize); 4991 4992 ret = ocfs2_extend_trans(handle, credits); 4993 if (ret) { 4994 mlog_errno(ret); 4995 return ret; 4996 } 4997 4998 /* Move half of the xattr in start_blk to the next bucket. */ 4999 return ocfs2_divide_xattr_bucket(inode, handle, prev_blk, 5000 new_blk, first_hash, 1); 5001 } 5002 5003 /* 5004 * Move some xattrs from the old cluster to the new one since they are not 5005 * contiguous in ocfs2 xattr tree. 5006 * 5007 * new_blk starts a new separate cluster, and we will move some xattrs from 5008 * prev_blk to it. v_start will be set as the first name hash value in this 5009 * new cluster so that it can be used as e_cpos during tree insertion and 5010 * don't collide with our original b-tree operations. first_bh and header_bh 5011 * will also be updated since they will be used in ocfs2_extend_xattr_bucket 5012 * to extend the insert bucket. 5013 * 5014 * The problem is how much xattr should we move to the new one and when should 5015 * we update first_bh and header_bh? 5016 * 1. If cluster size > bucket size, that means the previous cluster has more 5017 * than 1 bucket, so just move half nums of bucket into the new cluster and 5018 * update the first_bh and header_bh if the insert bucket has been moved 5019 * to the new cluster. 5020 * 2. If cluster_size == bucket_size: 5021 * a) If the previous extent rec has more than one cluster and the insert 5022 * place isn't in the last cluster, copy the entire last cluster to the 5023 * new one. This time, we don't need to upate the first_bh and header_bh 5024 * since they will not be moved into the new cluster. 5025 * b) Otherwise, move the bottom half of the xattrs in the last cluster into 5026 * the new one. And we set the extend flag to zero if the insert place is 5027 * moved into the new allocated cluster since no extend is needed. 5028 */ 5029 static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode, 5030 handle_t *handle, 5031 struct ocfs2_xattr_bucket *first, 5032 struct ocfs2_xattr_bucket *target, 5033 u64 new_blk, 5034 u32 prev_clusters, 5035 u32 *v_start, 5036 int *extend) 5037 { 5038 int ret; 5039 5040 trace_ocfs2_adjust_xattr_cross_cluster( 5041 (unsigned long long)bucket_blkno(first), 5042 (unsigned long long)new_blk, prev_clusters); 5043 5044 if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) { 5045 ret = ocfs2_mv_xattr_bucket_cross_cluster(inode, 5046 handle, 5047 first, target, 5048 new_blk, 5049 prev_clusters, 5050 v_start); 5051 if (ret) 5052 mlog_errno(ret); 5053 } else { 5054 /* The start of the last cluster in the first extent */ 5055 u64 last_blk = bucket_blkno(first) + 5056 ((prev_clusters - 1) * 5057 ocfs2_clusters_to_blocks(inode->i_sb, 1)); 5058 5059 if (prev_clusters > 1 && bucket_blkno(target) != last_blk) { 5060 ret = ocfs2_mv_xattr_buckets(inode, handle, 5061 bucket_blkno(first), 5062 last_blk, new_blk, 0, 5063 v_start); 5064 if (ret) 5065 mlog_errno(ret); 5066 } else { 5067 ret = ocfs2_divide_xattr_cluster(inode, handle, 5068 last_blk, new_blk, 5069 v_start); 5070 if (ret) 5071 mlog_errno(ret); 5072 5073 if ((bucket_blkno(target) == last_blk) && extend) 5074 *extend = 0; 5075 } 5076 } 5077 5078 return ret; 5079 } 5080 5081 /* 5082 * Add a new cluster for xattr storage. 5083 * 5084 * If the new cluster is contiguous with the previous one, it will be 5085 * appended to the same extent record, and num_clusters will be updated. 5086 * If not, we will insert a new extent for it and move some xattrs in 5087 * the last cluster into the new allocated one. 5088 * We also need to limit the maximum size of a btree leaf, otherwise we'll 5089 * lose the benefits of hashing because we'll have to search large leaves. 5090 * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize, 5091 * if it's bigger). 5092 * 5093 * first_bh is the first block of the previous extent rec and header_bh 5094 * indicates the bucket we will insert the new xattrs. They will be updated 5095 * when the header_bh is moved into the new cluster. 5096 */ 5097 static int ocfs2_add_new_xattr_cluster(struct inode *inode, 5098 struct buffer_head *root_bh, 5099 struct ocfs2_xattr_bucket *first, 5100 struct ocfs2_xattr_bucket *target, 5101 u32 *num_clusters, 5102 u32 prev_cpos, 5103 int *extend, 5104 struct ocfs2_xattr_set_ctxt *ctxt) 5105 { 5106 int ret; 5107 u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 5108 u32 prev_clusters = *num_clusters; 5109 u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0; 5110 u64 block; 5111 handle_t *handle = ctxt->handle; 5112 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5113 struct ocfs2_extent_tree et; 5114 5115 trace_ocfs2_add_new_xattr_cluster_begin( 5116 (unsigned long long)OCFS2_I(inode)->ip_blkno, 5117 (unsigned long long)bucket_blkno(first), 5118 prev_cpos, prev_clusters); 5119 5120 ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh); 5121 5122 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh, 5123 OCFS2_JOURNAL_ACCESS_WRITE); 5124 if (ret < 0) { 5125 mlog_errno(ret); 5126 goto leave; 5127 } 5128 5129 ret = __ocfs2_claim_clusters(handle, ctxt->data_ac, 1, 5130 clusters_to_add, &bit_off, &num_bits); 5131 if (ret < 0) { 5132 if (ret != -ENOSPC) 5133 mlog_errno(ret); 5134 goto leave; 5135 } 5136 5137 BUG_ON(num_bits > clusters_to_add); 5138 5139 block = ocfs2_clusters_to_blocks(osb->sb, bit_off); 5140 trace_ocfs2_add_new_xattr_cluster((unsigned long long)block, num_bits); 5141 5142 if (bucket_blkno(first) + (prev_clusters * bpc) == block && 5143 (prev_clusters + num_bits) << osb->s_clustersize_bits <= 5144 OCFS2_MAX_XATTR_TREE_LEAF_SIZE) { 5145 /* 5146 * If this cluster is contiguous with the old one and 5147 * adding this new cluster, we don't surpass the limit of 5148 * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be 5149 * initialized and used like other buckets in the previous 5150 * cluster. 5151 * So add it as a contiguous one. The caller will handle 5152 * its init process. 5153 */ 5154 v_start = prev_cpos + prev_clusters; 5155 *num_clusters = prev_clusters + num_bits; 5156 } else { 5157 ret = ocfs2_adjust_xattr_cross_cluster(inode, 5158 handle, 5159 first, 5160 target, 5161 block, 5162 prev_clusters, 5163 &v_start, 5164 extend); 5165 if (ret) { 5166 mlog_errno(ret); 5167 goto leave; 5168 } 5169 } 5170 5171 trace_ocfs2_add_new_xattr_cluster_insert((unsigned long long)block, 5172 v_start, num_bits); 5173 ret = ocfs2_insert_extent(handle, &et, v_start, block, 5174 num_bits, 0, ctxt->meta_ac); 5175 if (ret < 0) { 5176 mlog_errno(ret); 5177 goto leave; 5178 } 5179 5180 ocfs2_journal_dirty(handle, root_bh); 5181 5182 leave: 5183 return ret; 5184 } 5185 5186 /* 5187 * We are given an extent. 'first' is the bucket at the very front of 5188 * the extent. The extent has space for an additional bucket past 5189 * bucket_xh(first)->xh_num_buckets. 'target_blkno' is the block number 5190 * of the target bucket. We wish to shift every bucket past the target 5191 * down one, filling in that additional space. When we get back to the 5192 * target, we split the target between itself and the now-empty bucket 5193 * at target+1 (aka, target_blkno + blks_per_bucket). 5194 */ 5195 static int ocfs2_extend_xattr_bucket(struct inode *inode, 5196 handle_t *handle, 5197 struct ocfs2_xattr_bucket *first, 5198 u64 target_blk, 5199 u32 num_clusters) 5200 { 5201 int ret, credits; 5202 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5203 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 5204 u64 end_blk; 5205 u16 new_bucket = le16_to_cpu(bucket_xh(first)->xh_num_buckets); 5206 5207 trace_ocfs2_extend_xattr_bucket((unsigned long long)target_blk, 5208 (unsigned long long)bucket_blkno(first), 5209 num_clusters, new_bucket); 5210 5211 /* The extent must have room for an additional bucket */ 5212 BUG_ON(new_bucket >= 5213 (num_clusters * ocfs2_xattr_buckets_per_cluster(osb))); 5214 5215 /* end_blk points to the last existing bucket */ 5216 end_blk = bucket_blkno(first) + ((new_bucket - 1) * blk_per_bucket); 5217 5218 /* 5219 * end_blk is the start of the last existing bucket. 5220 * Thus, (end_blk - target_blk) covers the target bucket and 5221 * every bucket after it up to, but not including, the last 5222 * existing bucket. Then we add the last existing bucket, the 5223 * new bucket, and the first bucket (3 * blk_per_bucket). 5224 */ 5225 credits = (end_blk - target_blk) + (3 * blk_per_bucket); 5226 ret = ocfs2_extend_trans(handle, credits); 5227 if (ret) { 5228 mlog_errno(ret); 5229 goto out; 5230 } 5231 5232 ret = ocfs2_xattr_bucket_journal_access(handle, first, 5233 OCFS2_JOURNAL_ACCESS_WRITE); 5234 if (ret) { 5235 mlog_errno(ret); 5236 goto out; 5237 } 5238 5239 while (end_blk != target_blk) { 5240 ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk, 5241 end_blk + blk_per_bucket, 0); 5242 if (ret) 5243 goto out; 5244 end_blk -= blk_per_bucket; 5245 } 5246 5247 /* Move half of the xattr in target_blkno to the next bucket. */ 5248 ret = ocfs2_divide_xattr_bucket(inode, handle, target_blk, 5249 target_blk + blk_per_bucket, NULL, 0); 5250 5251 le16_add_cpu(&bucket_xh(first)->xh_num_buckets, 1); 5252 ocfs2_xattr_bucket_journal_dirty(handle, first); 5253 5254 out: 5255 return ret; 5256 } 5257 5258 /* 5259 * Add new xattr bucket in an extent record and adjust the buckets 5260 * accordingly. xb_bh is the ocfs2_xattr_block, and target is the 5261 * bucket we want to insert into. 5262 * 5263 * In the easy case, we will move all the buckets after target down by 5264 * one. Half of target's xattrs will be moved to the next bucket. 5265 * 5266 * If current cluster is full, we'll allocate a new one. This may not 5267 * be contiguous. The underlying calls will make sure that there is 5268 * space for the insert, shifting buckets around if necessary. 5269 * 'target' may be moved by those calls. 5270 */ 5271 static int ocfs2_add_new_xattr_bucket(struct inode *inode, 5272 struct buffer_head *xb_bh, 5273 struct ocfs2_xattr_bucket *target, 5274 struct ocfs2_xattr_set_ctxt *ctxt) 5275 { 5276 struct ocfs2_xattr_block *xb = 5277 (struct ocfs2_xattr_block *)xb_bh->b_data; 5278 struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root; 5279 struct ocfs2_extent_list *el = &xb_root->xt_list; 5280 u32 name_hash = 5281 le32_to_cpu(bucket_xh(target)->xh_entries[0].xe_name_hash); 5282 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5283 int ret, num_buckets, extend = 1; 5284 u64 p_blkno; 5285 u32 e_cpos, num_clusters; 5286 /* The bucket at the front of the extent */ 5287 struct ocfs2_xattr_bucket *first; 5288 5289 trace_ocfs2_add_new_xattr_bucket( 5290 (unsigned long long)bucket_blkno(target)); 5291 5292 /* The first bucket of the original extent */ 5293 first = ocfs2_xattr_bucket_new(inode); 5294 if (!first) { 5295 ret = -ENOMEM; 5296 mlog_errno(ret); 5297 goto out; 5298 } 5299 5300 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos, 5301 &num_clusters, el); 5302 if (ret) { 5303 mlog_errno(ret); 5304 goto out; 5305 } 5306 5307 ret = ocfs2_read_xattr_bucket(first, p_blkno); 5308 if (ret) { 5309 mlog_errno(ret); 5310 goto out; 5311 } 5312 5313 num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters; 5314 if (num_buckets == le16_to_cpu(bucket_xh(first)->xh_num_buckets)) { 5315 /* 5316 * This can move first+target if the target bucket moves 5317 * to the new extent. 5318 */ 5319 ret = ocfs2_add_new_xattr_cluster(inode, 5320 xb_bh, 5321 first, 5322 target, 5323 &num_clusters, 5324 e_cpos, 5325 &extend, 5326 ctxt); 5327 if (ret) { 5328 mlog_errno(ret); 5329 goto out; 5330 } 5331 } 5332 5333 if (extend) { 5334 ret = ocfs2_extend_xattr_bucket(inode, 5335 ctxt->handle, 5336 first, 5337 bucket_blkno(target), 5338 num_clusters); 5339 if (ret) 5340 mlog_errno(ret); 5341 } 5342 5343 out: 5344 ocfs2_xattr_bucket_free(first); 5345 5346 return ret; 5347 } 5348 5349 /* 5350 * Truncate the specified xe_off entry in xattr bucket. 5351 * bucket is indicated by header_bh and len is the new length. 5352 * Both the ocfs2_xattr_value_root and the entry will be updated here. 5353 * 5354 * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed. 5355 */ 5356 static int ocfs2_xattr_bucket_value_truncate(struct inode *inode, 5357 struct ocfs2_xattr_bucket *bucket, 5358 int xe_off, 5359 int len, 5360 struct ocfs2_xattr_set_ctxt *ctxt) 5361 { 5362 int ret, offset; 5363 u64 value_blk; 5364 struct ocfs2_xattr_entry *xe; 5365 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 5366 size_t blocksize = inode->i_sb->s_blocksize; 5367 struct ocfs2_xattr_value_buf vb = { 5368 .vb_access = ocfs2_journal_access, 5369 }; 5370 5371 xe = &xh->xh_entries[xe_off]; 5372 5373 BUG_ON(!xe || ocfs2_xattr_is_local(xe)); 5374 5375 offset = le16_to_cpu(xe->xe_name_offset) + 5376 OCFS2_XATTR_SIZE(xe->xe_name_len); 5377 5378 value_blk = offset / blocksize; 5379 5380 /* We don't allow ocfs2_xattr_value to be stored in different block. */ 5381 BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize); 5382 5383 vb.vb_bh = bucket->bu_bhs[value_blk]; 5384 BUG_ON(!vb.vb_bh); 5385 5386 vb.vb_xv = (struct ocfs2_xattr_value_root *) 5387 (vb.vb_bh->b_data + offset % blocksize); 5388 5389 /* 5390 * From here on out we have to dirty the bucket. The generic 5391 * value calls only modify one of the bucket's bhs, but we need 5392 * to send the bucket at once. So if they error, they *could* have 5393 * modified something. We have to assume they did, and dirty 5394 * the whole bucket. This leaves us in a consistent state. 5395 */ 5396 trace_ocfs2_xattr_bucket_value_truncate( 5397 (unsigned long long)bucket_blkno(bucket), xe_off, len); 5398 ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt); 5399 if (ret) { 5400 mlog_errno(ret); 5401 goto out; 5402 } 5403 5404 ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket, 5405 OCFS2_JOURNAL_ACCESS_WRITE); 5406 if (ret) { 5407 mlog_errno(ret); 5408 goto out; 5409 } 5410 5411 xe->xe_value_size = cpu_to_le64(len); 5412 5413 ocfs2_xattr_bucket_journal_dirty(ctxt->handle, bucket); 5414 5415 out: 5416 return ret; 5417 } 5418 5419 static int ocfs2_rm_xattr_cluster(struct inode *inode, 5420 struct buffer_head *root_bh, 5421 u64 blkno, 5422 u32 cpos, 5423 u32 len, 5424 void *para) 5425 { 5426 int ret; 5427 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5428 struct inode *tl_inode = osb->osb_tl_inode; 5429 handle_t *handle; 5430 struct ocfs2_xattr_block *xb = 5431 (struct ocfs2_xattr_block *)root_bh->b_data; 5432 struct ocfs2_alloc_context *meta_ac = NULL; 5433 struct ocfs2_cached_dealloc_ctxt dealloc; 5434 struct ocfs2_extent_tree et; 5435 5436 ret = ocfs2_iterate_xattr_buckets(inode, blkno, len, 5437 ocfs2_delete_xattr_in_bucket, para); 5438 if (ret) { 5439 mlog_errno(ret); 5440 return ret; 5441 } 5442 5443 ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh); 5444 5445 ocfs2_init_dealloc_ctxt(&dealloc); 5446 5447 trace_ocfs2_rm_xattr_cluster( 5448 (unsigned long long)OCFS2_I(inode)->ip_blkno, 5449 (unsigned long long)blkno, cpos, len); 5450 5451 ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), blkno, 5452 len); 5453 5454 ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac); 5455 if (ret) { 5456 mlog_errno(ret); 5457 return ret; 5458 } 5459 5460 inode_lock(tl_inode); 5461 5462 if (ocfs2_truncate_log_needs_flush(osb)) { 5463 ret = __ocfs2_flush_truncate_log(osb); 5464 if (ret < 0) { 5465 mlog_errno(ret); 5466 goto out; 5467 } 5468 } 5469 5470 handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb)); 5471 if (IS_ERR(handle)) { 5472 ret = -ENOMEM; 5473 mlog_errno(ret); 5474 goto out; 5475 } 5476 5477 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh, 5478 OCFS2_JOURNAL_ACCESS_WRITE); 5479 if (ret) { 5480 mlog_errno(ret); 5481 goto out_commit; 5482 } 5483 5484 ret = ocfs2_remove_extent(handle, &et, cpos, len, meta_ac, 5485 &dealloc); 5486 if (ret) { 5487 mlog_errno(ret); 5488 goto out_commit; 5489 } 5490 5491 le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len); 5492 ocfs2_journal_dirty(handle, root_bh); 5493 5494 ret = ocfs2_truncate_log_append(osb, handle, blkno, len); 5495 if (ret) 5496 mlog_errno(ret); 5497 ocfs2_update_inode_fsync_trans(handle, inode, 0); 5498 5499 out_commit: 5500 ocfs2_commit_trans(osb, handle); 5501 out: 5502 ocfs2_schedule_truncate_log_flush(osb, 1); 5503 5504 inode_unlock(tl_inode); 5505 5506 if (meta_ac) 5507 ocfs2_free_alloc_context(meta_ac); 5508 5509 ocfs2_run_deallocs(osb, &dealloc); 5510 5511 return ret; 5512 } 5513 5514 /* 5515 * check whether the xattr bucket is filled up with the same hash value. 5516 * If we want to insert the xattr with the same hash, return -ENOSPC. 5517 * If we want to insert a xattr with different hash value, go ahead 5518 * and ocfs2_divide_xattr_bucket will handle this. 5519 */ 5520 static int ocfs2_check_xattr_bucket_collision(struct inode *inode, 5521 struct ocfs2_xattr_bucket *bucket, 5522 const char *name) 5523 { 5524 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 5525 u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name)); 5526 5527 if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash)) 5528 return 0; 5529 5530 if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash == 5531 xh->xh_entries[0].xe_name_hash) { 5532 mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, " 5533 "hash = %u\n", 5534 (unsigned long long)bucket_blkno(bucket), 5535 le32_to_cpu(xh->xh_entries[0].xe_name_hash)); 5536 return -ENOSPC; 5537 } 5538 5539 return 0; 5540 } 5541 5542 /* 5543 * Try to set the entry in the current bucket. If we fail, the caller 5544 * will handle getting us another bucket. 5545 */ 5546 static int ocfs2_xattr_set_entry_bucket(struct inode *inode, 5547 struct ocfs2_xattr_info *xi, 5548 struct ocfs2_xattr_search *xs, 5549 struct ocfs2_xattr_set_ctxt *ctxt) 5550 { 5551 int ret; 5552 struct ocfs2_xa_loc loc; 5553 5554 trace_ocfs2_xattr_set_entry_bucket(xi->xi_name); 5555 5556 ocfs2_init_xattr_bucket_xa_loc(&loc, xs->bucket, 5557 xs->not_found ? NULL : xs->here); 5558 ret = ocfs2_xa_set(&loc, xi, ctxt); 5559 if (!ret) { 5560 xs->here = loc.xl_entry; 5561 goto out; 5562 } 5563 if (ret != -ENOSPC) { 5564 mlog_errno(ret); 5565 goto out; 5566 } 5567 5568 /* Ok, we need space. Let's try defragmenting the bucket. */ 5569 ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle, 5570 xs->bucket); 5571 if (ret) { 5572 mlog_errno(ret); 5573 goto out; 5574 } 5575 5576 ret = ocfs2_xa_set(&loc, xi, ctxt); 5577 if (!ret) { 5578 xs->here = loc.xl_entry; 5579 goto out; 5580 } 5581 if (ret != -ENOSPC) 5582 mlog_errno(ret); 5583 5584 5585 out: 5586 return ret; 5587 } 5588 5589 static int ocfs2_xattr_set_entry_index_block(struct inode *inode, 5590 struct ocfs2_xattr_info *xi, 5591 struct ocfs2_xattr_search *xs, 5592 struct ocfs2_xattr_set_ctxt *ctxt) 5593 { 5594 int ret; 5595 5596 trace_ocfs2_xattr_set_entry_index_block(xi->xi_name); 5597 5598 ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt); 5599 if (!ret) 5600 goto out; 5601 if (ret != -ENOSPC) { 5602 mlog_errno(ret); 5603 goto out; 5604 } 5605 5606 /* Ack, need more space. Let's try to get another bucket! */ 5607 5608 /* 5609 * We do not allow for overlapping ranges between buckets. And 5610 * the maximum number of collisions we will allow for then is 5611 * one bucket's worth, so check it here whether we need to 5612 * add a new bucket for the insert. 5613 */ 5614 ret = ocfs2_check_xattr_bucket_collision(inode, 5615 xs->bucket, 5616 xi->xi_name); 5617 if (ret) { 5618 mlog_errno(ret); 5619 goto out; 5620 } 5621 5622 ret = ocfs2_add_new_xattr_bucket(inode, 5623 xs->xattr_bh, 5624 xs->bucket, 5625 ctxt); 5626 if (ret) { 5627 mlog_errno(ret); 5628 goto out; 5629 } 5630 5631 /* 5632 * ocfs2_add_new_xattr_bucket() will have updated 5633 * xs->bucket if it moved, but it will not have updated 5634 * any of the other search fields. Thus, we drop it and 5635 * re-search. Everything should be cached, so it'll be 5636 * quick. 5637 */ 5638 ocfs2_xattr_bucket_relse(xs->bucket); 5639 ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh, 5640 xi->xi_name_index, 5641 xi->xi_name, xs); 5642 if (ret && ret != -ENODATA) 5643 goto out; 5644 xs->not_found = ret; 5645 5646 /* Ok, we have a new bucket, let's try again */ 5647 ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt); 5648 if (ret && (ret != -ENOSPC)) 5649 mlog_errno(ret); 5650 5651 out: 5652 return ret; 5653 } 5654 5655 static int ocfs2_delete_xattr_in_bucket(struct inode *inode, 5656 struct ocfs2_xattr_bucket *bucket, 5657 void *para) 5658 { 5659 int ret = 0, ref_credits; 5660 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 5661 u16 i; 5662 struct ocfs2_xattr_entry *xe; 5663 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5664 struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,}; 5665 int credits = ocfs2_remove_extent_credits(osb->sb) + 5666 ocfs2_blocks_per_xattr_bucket(inode->i_sb); 5667 struct ocfs2_xattr_value_root *xv; 5668 struct ocfs2_rm_xattr_bucket_para *args = 5669 (struct ocfs2_rm_xattr_bucket_para *)para; 5670 5671 ocfs2_init_dealloc_ctxt(&ctxt.dealloc); 5672 5673 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 5674 xe = &xh->xh_entries[i]; 5675 if (ocfs2_xattr_is_local(xe)) 5676 continue; 5677 5678 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, 5679 i, &xv, NULL); 5680 if (ret) { 5681 mlog_errno(ret); 5682 break; 5683 } 5684 5685 ret = ocfs2_lock_xattr_remove_allocators(inode, xv, 5686 args->ref_ci, 5687 args->ref_root_bh, 5688 &ctxt.meta_ac, 5689 &ref_credits); 5690 5691 ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits); 5692 if (IS_ERR(ctxt.handle)) { 5693 ret = PTR_ERR(ctxt.handle); 5694 mlog_errno(ret); 5695 break; 5696 } 5697 5698 ret = ocfs2_xattr_bucket_value_truncate(inode, bucket, 5699 i, 0, &ctxt); 5700 5701 ocfs2_commit_trans(osb, ctxt.handle); 5702 if (ctxt.meta_ac) { 5703 ocfs2_free_alloc_context(ctxt.meta_ac); 5704 ctxt.meta_ac = NULL; 5705 } 5706 if (ret) { 5707 mlog_errno(ret); 5708 break; 5709 } 5710 } 5711 5712 if (ctxt.meta_ac) 5713 ocfs2_free_alloc_context(ctxt.meta_ac); 5714 ocfs2_schedule_truncate_log_flush(osb, 1); 5715 ocfs2_run_deallocs(osb, &ctxt.dealloc); 5716 return ret; 5717 } 5718 5719 /* 5720 * Whenever we modify a xattr value root in the bucket(e.g, CoW 5721 * or change the extent record flag), we need to recalculate 5722 * the metaecc for the whole bucket. So it is done here. 5723 * 5724 * Note: 5725 * We have to give the extra credits for the caller. 5726 */ 5727 static int ocfs2_xattr_bucket_post_refcount(struct inode *inode, 5728 handle_t *handle, 5729 void *para) 5730 { 5731 int ret; 5732 struct ocfs2_xattr_bucket *bucket = 5733 (struct ocfs2_xattr_bucket *)para; 5734 5735 ret = ocfs2_xattr_bucket_journal_access(handle, bucket, 5736 OCFS2_JOURNAL_ACCESS_WRITE); 5737 if (ret) { 5738 mlog_errno(ret); 5739 return ret; 5740 } 5741 5742 ocfs2_xattr_bucket_journal_dirty(handle, bucket); 5743 5744 return 0; 5745 } 5746 5747 /* 5748 * Special action we need if the xattr value is refcounted. 5749 * 5750 * 1. If the xattr is refcounted, lock the tree. 5751 * 2. CoW the xattr if we are setting the new value and the value 5752 * will be stored outside. 5753 * 3. In other case, decrease_refcount will work for us, so just 5754 * lock the refcount tree, calculate the meta and credits is OK. 5755 * 5756 * We have to do CoW before ocfs2_init_xattr_set_ctxt since 5757 * currently CoW is a completed transaction, while this function 5758 * will also lock the allocators and let us deadlock. So we will 5759 * CoW the whole xattr value. 5760 */ 5761 static int ocfs2_prepare_refcount_xattr(struct inode *inode, 5762 struct ocfs2_dinode *di, 5763 struct ocfs2_xattr_info *xi, 5764 struct ocfs2_xattr_search *xis, 5765 struct ocfs2_xattr_search *xbs, 5766 struct ocfs2_refcount_tree **ref_tree, 5767 int *meta_add, 5768 int *credits) 5769 { 5770 int ret = 0; 5771 struct ocfs2_xattr_block *xb; 5772 struct ocfs2_xattr_entry *xe; 5773 char *base; 5774 u32 p_cluster, num_clusters; 5775 unsigned int ext_flags; 5776 int name_offset, name_len; 5777 struct ocfs2_xattr_value_buf vb; 5778 struct ocfs2_xattr_bucket *bucket = NULL; 5779 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5780 struct ocfs2_post_refcount refcount; 5781 struct ocfs2_post_refcount *p = NULL; 5782 struct buffer_head *ref_root_bh = NULL; 5783 5784 if (!xis->not_found) { 5785 xe = xis->here; 5786 name_offset = le16_to_cpu(xe->xe_name_offset); 5787 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 5788 base = xis->base; 5789 vb.vb_bh = xis->inode_bh; 5790 vb.vb_access = ocfs2_journal_access_di; 5791 } else { 5792 int i, block_off = 0; 5793 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; 5794 xe = xbs->here; 5795 name_offset = le16_to_cpu(xe->xe_name_offset); 5796 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 5797 i = xbs->here - xbs->header->xh_entries; 5798 5799 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 5800 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 5801 bucket_xh(xbs->bucket), 5802 i, &block_off, 5803 &name_offset); 5804 if (ret) { 5805 mlog_errno(ret); 5806 goto out; 5807 } 5808 base = bucket_block(xbs->bucket, block_off); 5809 vb.vb_bh = xbs->bucket->bu_bhs[block_off]; 5810 vb.vb_access = ocfs2_journal_access; 5811 5812 if (ocfs2_meta_ecc(osb)) { 5813 /*create parameters for ocfs2_post_refcount. */ 5814 bucket = xbs->bucket; 5815 refcount.credits = bucket->bu_blocks; 5816 refcount.para = bucket; 5817 refcount.func = 5818 ocfs2_xattr_bucket_post_refcount; 5819 p = &refcount; 5820 } 5821 } else { 5822 base = xbs->base; 5823 vb.vb_bh = xbs->xattr_bh; 5824 vb.vb_access = ocfs2_journal_access_xb; 5825 } 5826 } 5827 5828 if (ocfs2_xattr_is_local(xe)) 5829 goto out; 5830 5831 vb.vb_xv = (struct ocfs2_xattr_value_root *) 5832 (base + name_offset + name_len); 5833 5834 ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster, 5835 &num_clusters, &vb.vb_xv->xr_list, 5836 &ext_flags); 5837 if (ret) { 5838 mlog_errno(ret); 5839 goto out; 5840 } 5841 5842 /* 5843 * We just need to check the 1st extent record, since we always 5844 * CoW the whole xattr. So there shouldn't be a xattr with 5845 * some REFCOUNT extent recs after the 1st one. 5846 */ 5847 if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) 5848 goto out; 5849 5850 ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc), 5851 1, ref_tree, &ref_root_bh); 5852 if (ret) { 5853 mlog_errno(ret); 5854 goto out; 5855 } 5856 5857 /* 5858 * If we are deleting the xattr or the new size will be stored inside, 5859 * cool, leave it there, the xattr truncate process will remove them 5860 * for us(it still needs the refcount tree lock and the meta, credits). 5861 * And the worse case is that every cluster truncate will split the 5862 * refcount tree, and make the original extent become 3. So we will need 5863 * 2 * cluster more extent recs at most. 5864 */ 5865 if (!xi->xi_value || xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE) { 5866 5867 ret = ocfs2_refcounted_xattr_delete_need(inode, 5868 &(*ref_tree)->rf_ci, 5869 ref_root_bh, vb.vb_xv, 5870 meta_add, credits); 5871 if (ret) 5872 mlog_errno(ret); 5873 goto out; 5874 } 5875 5876 ret = ocfs2_refcount_cow_xattr(inode, di, &vb, 5877 *ref_tree, ref_root_bh, 0, 5878 le32_to_cpu(vb.vb_xv->xr_clusters), p); 5879 if (ret) 5880 mlog_errno(ret); 5881 5882 out: 5883 brelse(ref_root_bh); 5884 return ret; 5885 } 5886 5887 /* 5888 * Add the REFCOUNTED flags for all the extent rec in ocfs2_xattr_value_root. 5889 * The physical clusters will be added to refcount tree. 5890 */ 5891 static int ocfs2_xattr_value_attach_refcount(struct inode *inode, 5892 struct ocfs2_xattr_value_root *xv, 5893 struct ocfs2_extent_tree *value_et, 5894 struct ocfs2_caching_info *ref_ci, 5895 struct buffer_head *ref_root_bh, 5896 struct ocfs2_cached_dealloc_ctxt *dealloc, 5897 struct ocfs2_post_refcount *refcount) 5898 { 5899 int ret = 0; 5900 u32 clusters = le32_to_cpu(xv->xr_clusters); 5901 u32 cpos, p_cluster, num_clusters; 5902 struct ocfs2_extent_list *el = &xv->xr_list; 5903 unsigned int ext_flags; 5904 5905 cpos = 0; 5906 while (cpos < clusters) { 5907 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, 5908 &num_clusters, el, &ext_flags); 5909 if (ret) { 5910 mlog_errno(ret); 5911 break; 5912 } 5913 5914 cpos += num_clusters; 5915 if ((ext_flags & OCFS2_EXT_REFCOUNTED)) 5916 continue; 5917 5918 BUG_ON(!p_cluster); 5919 5920 ret = ocfs2_add_refcount_flag(inode, value_et, 5921 ref_ci, ref_root_bh, 5922 cpos - num_clusters, 5923 p_cluster, num_clusters, 5924 dealloc, refcount); 5925 if (ret) { 5926 mlog_errno(ret); 5927 break; 5928 } 5929 } 5930 5931 return ret; 5932 } 5933 5934 /* 5935 * Given a normal ocfs2_xattr_header, refcount all the entries which 5936 * have value stored outside. 5937 * Used for xattrs stored in inode and ocfs2_xattr_block. 5938 */ 5939 static int ocfs2_xattr_attach_refcount_normal(struct inode *inode, 5940 struct ocfs2_xattr_value_buf *vb, 5941 struct ocfs2_xattr_header *header, 5942 struct ocfs2_caching_info *ref_ci, 5943 struct buffer_head *ref_root_bh, 5944 struct ocfs2_cached_dealloc_ctxt *dealloc) 5945 { 5946 5947 struct ocfs2_xattr_entry *xe; 5948 struct ocfs2_xattr_value_root *xv; 5949 struct ocfs2_extent_tree et; 5950 int i, ret = 0; 5951 5952 for (i = 0; i < le16_to_cpu(header->xh_count); i++) { 5953 xe = &header->xh_entries[i]; 5954 5955 if (ocfs2_xattr_is_local(xe)) 5956 continue; 5957 5958 xv = (struct ocfs2_xattr_value_root *)((void *)header + 5959 le16_to_cpu(xe->xe_name_offset) + 5960 OCFS2_XATTR_SIZE(xe->xe_name_len)); 5961 5962 vb->vb_xv = xv; 5963 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); 5964 5965 ret = ocfs2_xattr_value_attach_refcount(inode, xv, &et, 5966 ref_ci, ref_root_bh, 5967 dealloc, NULL); 5968 if (ret) { 5969 mlog_errno(ret); 5970 break; 5971 } 5972 } 5973 5974 return ret; 5975 } 5976 5977 static int ocfs2_xattr_inline_attach_refcount(struct inode *inode, 5978 struct buffer_head *fe_bh, 5979 struct ocfs2_caching_info *ref_ci, 5980 struct buffer_head *ref_root_bh, 5981 struct ocfs2_cached_dealloc_ctxt *dealloc) 5982 { 5983 struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data; 5984 struct ocfs2_xattr_header *header = (struct ocfs2_xattr_header *) 5985 (fe_bh->b_data + inode->i_sb->s_blocksize - 5986 le16_to_cpu(di->i_xattr_inline_size)); 5987 struct ocfs2_xattr_value_buf vb = { 5988 .vb_bh = fe_bh, 5989 .vb_access = ocfs2_journal_access_di, 5990 }; 5991 5992 return ocfs2_xattr_attach_refcount_normal(inode, &vb, header, 5993 ref_ci, ref_root_bh, dealloc); 5994 } 5995 5996 struct ocfs2_xattr_tree_value_refcount_para { 5997 struct ocfs2_caching_info *ref_ci; 5998 struct buffer_head *ref_root_bh; 5999 struct ocfs2_cached_dealloc_ctxt *dealloc; 6000 }; 6001 6002 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb, 6003 struct ocfs2_xattr_bucket *bucket, 6004 int offset, 6005 struct ocfs2_xattr_value_root **xv, 6006 struct buffer_head **bh) 6007 { 6008 int ret, block_off, name_offset; 6009 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 6010 struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset]; 6011 void *base; 6012 6013 ret = ocfs2_xattr_bucket_get_name_value(sb, 6014 bucket_xh(bucket), 6015 offset, 6016 &block_off, 6017 &name_offset); 6018 if (ret) { 6019 mlog_errno(ret); 6020 goto out; 6021 } 6022 6023 base = bucket_block(bucket, block_off); 6024 6025 *xv = (struct ocfs2_xattr_value_root *)(base + name_offset + 6026 OCFS2_XATTR_SIZE(xe->xe_name_len)); 6027 6028 if (bh) 6029 *bh = bucket->bu_bhs[block_off]; 6030 out: 6031 return ret; 6032 } 6033 6034 /* 6035 * For a given xattr bucket, refcount all the entries which 6036 * have value stored outside. 6037 */ 6038 static int ocfs2_xattr_bucket_value_refcount(struct inode *inode, 6039 struct ocfs2_xattr_bucket *bucket, 6040 void *para) 6041 { 6042 int i, ret = 0; 6043 struct ocfs2_extent_tree et; 6044 struct ocfs2_xattr_tree_value_refcount_para *ref = 6045 (struct ocfs2_xattr_tree_value_refcount_para *)para; 6046 struct ocfs2_xattr_header *xh = 6047 (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data; 6048 struct ocfs2_xattr_entry *xe; 6049 struct ocfs2_xattr_value_buf vb = { 6050 .vb_access = ocfs2_journal_access, 6051 }; 6052 struct ocfs2_post_refcount refcount = { 6053 .credits = bucket->bu_blocks, 6054 .para = bucket, 6055 .func = ocfs2_xattr_bucket_post_refcount, 6056 }; 6057 struct ocfs2_post_refcount *p = NULL; 6058 6059 /* We only need post_refcount if we support metaecc. */ 6060 if (ocfs2_meta_ecc(OCFS2_SB(inode->i_sb))) 6061 p = &refcount; 6062 6063 trace_ocfs2_xattr_bucket_value_refcount( 6064 (unsigned long long)bucket_blkno(bucket), 6065 le16_to_cpu(xh->xh_count)); 6066 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 6067 xe = &xh->xh_entries[i]; 6068 6069 if (ocfs2_xattr_is_local(xe)) 6070 continue; 6071 6072 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, i, 6073 &vb.vb_xv, &vb.vb_bh); 6074 if (ret) { 6075 mlog_errno(ret); 6076 break; 6077 } 6078 6079 ocfs2_init_xattr_value_extent_tree(&et, 6080 INODE_CACHE(inode), &vb); 6081 6082 ret = ocfs2_xattr_value_attach_refcount(inode, vb.vb_xv, 6083 &et, ref->ref_ci, 6084 ref->ref_root_bh, 6085 ref->dealloc, p); 6086 if (ret) { 6087 mlog_errno(ret); 6088 break; 6089 } 6090 } 6091 6092 return ret; 6093 6094 } 6095 6096 static int ocfs2_refcount_xattr_tree_rec(struct inode *inode, 6097 struct buffer_head *root_bh, 6098 u64 blkno, u32 cpos, u32 len, void *para) 6099 { 6100 return ocfs2_iterate_xattr_buckets(inode, blkno, len, 6101 ocfs2_xattr_bucket_value_refcount, 6102 para); 6103 } 6104 6105 static int ocfs2_xattr_block_attach_refcount(struct inode *inode, 6106 struct buffer_head *blk_bh, 6107 struct ocfs2_caching_info *ref_ci, 6108 struct buffer_head *ref_root_bh, 6109 struct ocfs2_cached_dealloc_ctxt *dealloc) 6110 { 6111 int ret = 0; 6112 struct ocfs2_xattr_block *xb = 6113 (struct ocfs2_xattr_block *)blk_bh->b_data; 6114 6115 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 6116 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header; 6117 struct ocfs2_xattr_value_buf vb = { 6118 .vb_bh = blk_bh, 6119 .vb_access = ocfs2_journal_access_xb, 6120 }; 6121 6122 ret = ocfs2_xattr_attach_refcount_normal(inode, &vb, header, 6123 ref_ci, ref_root_bh, 6124 dealloc); 6125 } else { 6126 struct ocfs2_xattr_tree_value_refcount_para para = { 6127 .ref_ci = ref_ci, 6128 .ref_root_bh = ref_root_bh, 6129 .dealloc = dealloc, 6130 }; 6131 6132 ret = ocfs2_iterate_xattr_index_block(inode, blk_bh, 6133 ocfs2_refcount_xattr_tree_rec, 6134 ¶); 6135 } 6136 6137 return ret; 6138 } 6139 6140 int ocfs2_xattr_attach_refcount_tree(struct inode *inode, 6141 struct buffer_head *fe_bh, 6142 struct ocfs2_caching_info *ref_ci, 6143 struct buffer_head *ref_root_bh, 6144 struct ocfs2_cached_dealloc_ctxt *dealloc) 6145 { 6146 int ret = 0; 6147 struct ocfs2_inode_info *oi = OCFS2_I(inode); 6148 struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data; 6149 struct buffer_head *blk_bh = NULL; 6150 6151 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 6152 ret = ocfs2_xattr_inline_attach_refcount(inode, fe_bh, 6153 ref_ci, ref_root_bh, 6154 dealloc); 6155 if (ret) { 6156 mlog_errno(ret); 6157 goto out; 6158 } 6159 } 6160 6161 if (!di->i_xattr_loc) 6162 goto out; 6163 6164 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc), 6165 &blk_bh); 6166 if (ret < 0) { 6167 mlog_errno(ret); 6168 goto out; 6169 } 6170 6171 ret = ocfs2_xattr_block_attach_refcount(inode, blk_bh, ref_ci, 6172 ref_root_bh, dealloc); 6173 if (ret) 6174 mlog_errno(ret); 6175 6176 brelse(blk_bh); 6177 out: 6178 6179 return ret; 6180 } 6181 6182 typedef int (should_xattr_reflinked)(struct ocfs2_xattr_entry *xe); 6183 /* 6184 * Store the information we need in xattr reflink. 6185 * old_bh and new_bh are inode bh for the old and new inode. 6186 */ 6187 struct ocfs2_xattr_reflink { 6188 struct inode *old_inode; 6189 struct inode *new_inode; 6190 struct buffer_head *old_bh; 6191 struct buffer_head *new_bh; 6192 struct ocfs2_caching_info *ref_ci; 6193 struct buffer_head *ref_root_bh; 6194 struct ocfs2_cached_dealloc_ctxt *dealloc; 6195 should_xattr_reflinked *xattr_reflinked; 6196 }; 6197 6198 /* 6199 * Given a xattr header and xe offset, 6200 * return the proper xv and the corresponding bh. 6201 * xattr in inode, block and xattr tree have different implementaions. 6202 */ 6203 typedef int (get_xattr_value_root)(struct super_block *sb, 6204 struct buffer_head *bh, 6205 struct ocfs2_xattr_header *xh, 6206 int offset, 6207 struct ocfs2_xattr_value_root **xv, 6208 struct buffer_head **ret_bh, 6209 void *para); 6210 6211 /* 6212 * Calculate all the xattr value root metadata stored in this xattr header and 6213 * credits we need if we create them from the scratch. 6214 * We use get_xattr_value_root so that all types of xattr container can use it. 6215 */ 6216 static int ocfs2_value_metas_in_xattr_header(struct super_block *sb, 6217 struct buffer_head *bh, 6218 struct ocfs2_xattr_header *xh, 6219 int *metas, int *credits, 6220 int *num_recs, 6221 get_xattr_value_root *func, 6222 void *para) 6223 { 6224 int i, ret = 0; 6225 struct ocfs2_xattr_value_root *xv; 6226 struct ocfs2_xattr_entry *xe; 6227 6228 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 6229 xe = &xh->xh_entries[i]; 6230 if (ocfs2_xattr_is_local(xe)) 6231 continue; 6232 6233 ret = func(sb, bh, xh, i, &xv, NULL, para); 6234 if (ret) { 6235 mlog_errno(ret); 6236 break; 6237 } 6238 6239 *metas += le16_to_cpu(xv->xr_list.l_tree_depth) * 6240 le16_to_cpu(xv->xr_list.l_next_free_rec); 6241 6242 *credits += ocfs2_calc_extend_credits(sb, 6243 &def_xv.xv.xr_list); 6244 6245 /* 6246 * If the value is a tree with depth > 1, We don't go deep 6247 * to the extent block, so just calculate a maximum record num. 6248 */ 6249 if (!xv->xr_list.l_tree_depth) 6250 *num_recs += le16_to_cpu(xv->xr_list.l_next_free_rec); 6251 else 6252 *num_recs += ocfs2_clusters_for_bytes(sb, 6253 XATTR_SIZE_MAX); 6254 } 6255 6256 return ret; 6257 } 6258 6259 /* Used by xattr inode and block to return the right xv and buffer_head. */ 6260 static int ocfs2_get_xattr_value_root(struct super_block *sb, 6261 struct buffer_head *bh, 6262 struct ocfs2_xattr_header *xh, 6263 int offset, 6264 struct ocfs2_xattr_value_root **xv, 6265 struct buffer_head **ret_bh, 6266 void *para) 6267 { 6268 struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset]; 6269 6270 *xv = (struct ocfs2_xattr_value_root *)((void *)xh + 6271 le16_to_cpu(xe->xe_name_offset) + 6272 OCFS2_XATTR_SIZE(xe->xe_name_len)); 6273 6274 if (ret_bh) 6275 *ret_bh = bh; 6276 6277 return 0; 6278 } 6279 6280 /* 6281 * Lock the meta_ac and caculate how much credits we need for reflink xattrs. 6282 * It is only used for inline xattr and xattr block. 6283 */ 6284 static int ocfs2_reflink_lock_xattr_allocators(struct ocfs2_super *osb, 6285 struct ocfs2_xattr_header *xh, 6286 struct buffer_head *ref_root_bh, 6287 int *credits, 6288 struct ocfs2_alloc_context **meta_ac) 6289 { 6290 int ret, meta_add = 0, num_recs = 0; 6291 struct ocfs2_refcount_block *rb = 6292 (struct ocfs2_refcount_block *)ref_root_bh->b_data; 6293 6294 *credits = 0; 6295 6296 ret = ocfs2_value_metas_in_xattr_header(osb->sb, NULL, xh, 6297 &meta_add, credits, &num_recs, 6298 ocfs2_get_xattr_value_root, 6299 NULL); 6300 if (ret) { 6301 mlog_errno(ret); 6302 goto out; 6303 } 6304 6305 /* 6306 * We need to add/modify num_recs in refcount tree, so just calculate 6307 * an approximate number we need for refcount tree change. 6308 * Sometimes we need to split the tree, and after split, half recs 6309 * will be moved to the new block, and a new block can only provide 6310 * half number of recs. So we multiple new blocks by 2. 6311 */ 6312 num_recs = num_recs / ocfs2_refcount_recs_per_rb(osb->sb) * 2; 6313 meta_add += num_recs; 6314 *credits += num_recs + num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS; 6315 if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL) 6316 *credits += le16_to_cpu(rb->rf_list.l_tree_depth) * 6317 le16_to_cpu(rb->rf_list.l_next_free_rec) + 1; 6318 else 6319 *credits += 1; 6320 6321 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, meta_ac); 6322 if (ret) 6323 mlog_errno(ret); 6324 6325 out: 6326 return ret; 6327 } 6328 6329 /* 6330 * Given a xattr header, reflink all the xattrs in this container. 6331 * It can be used for inode, block and bucket. 6332 * 6333 * NOTE: 6334 * Before we call this function, the caller has memcpy the xattr in 6335 * old_xh to the new_xh. 6336 * 6337 * If args.xattr_reflinked is set, call it to decide whether the xe should 6338 * be reflinked or not. If not, remove it from the new xattr header. 6339 */ 6340 static int ocfs2_reflink_xattr_header(handle_t *handle, 6341 struct ocfs2_xattr_reflink *args, 6342 struct buffer_head *old_bh, 6343 struct ocfs2_xattr_header *xh, 6344 struct buffer_head *new_bh, 6345 struct ocfs2_xattr_header *new_xh, 6346 struct ocfs2_xattr_value_buf *vb, 6347 struct ocfs2_alloc_context *meta_ac, 6348 get_xattr_value_root *func, 6349 void *para) 6350 { 6351 int ret = 0, i, j; 6352 struct super_block *sb = args->old_inode->i_sb; 6353 struct buffer_head *value_bh; 6354 struct ocfs2_xattr_entry *xe, *last; 6355 struct ocfs2_xattr_value_root *xv, *new_xv; 6356 struct ocfs2_extent_tree data_et; 6357 u32 clusters, cpos, p_cluster, num_clusters; 6358 unsigned int ext_flags = 0; 6359 6360 trace_ocfs2_reflink_xattr_header((unsigned long long)old_bh->b_blocknr, 6361 le16_to_cpu(xh->xh_count)); 6362 6363 last = &new_xh->xh_entries[le16_to_cpu(new_xh->xh_count)]; 6364 for (i = 0, j = 0; i < le16_to_cpu(xh->xh_count); i++, j++) { 6365 xe = &xh->xh_entries[i]; 6366 6367 if (args->xattr_reflinked && !args->xattr_reflinked(xe)) { 6368 xe = &new_xh->xh_entries[j]; 6369 6370 le16_add_cpu(&new_xh->xh_count, -1); 6371 if (new_xh->xh_count) { 6372 memmove(xe, xe + 1, 6373 (void *)last - (void *)xe); 6374 memset(last, 0, 6375 sizeof(struct ocfs2_xattr_entry)); 6376 } 6377 6378 /* 6379 * We don't want j to increase in the next round since 6380 * it is already moved ahead. 6381 */ 6382 j--; 6383 continue; 6384 } 6385 6386 if (ocfs2_xattr_is_local(xe)) 6387 continue; 6388 6389 ret = func(sb, old_bh, xh, i, &xv, NULL, para); 6390 if (ret) { 6391 mlog_errno(ret); 6392 break; 6393 } 6394 6395 ret = func(sb, new_bh, new_xh, j, &new_xv, &value_bh, para); 6396 if (ret) { 6397 mlog_errno(ret); 6398 break; 6399 } 6400 6401 /* 6402 * For the xattr which has l_tree_depth = 0, all the extent 6403 * recs have already be copied to the new xh with the 6404 * propriate OCFS2_EXT_REFCOUNTED flag we just need to 6405 * increase the refount count int the refcount tree. 6406 * 6407 * For the xattr which has l_tree_depth > 0, we need 6408 * to initialize it to the empty default value root, 6409 * and then insert the extents one by one. 6410 */ 6411 if (xv->xr_list.l_tree_depth) { 6412 memcpy(new_xv, &def_xv, OCFS2_XATTR_ROOT_SIZE); 6413 vb->vb_xv = new_xv; 6414 vb->vb_bh = value_bh; 6415 ocfs2_init_xattr_value_extent_tree(&data_et, 6416 INODE_CACHE(args->new_inode), vb); 6417 } 6418 6419 clusters = le32_to_cpu(xv->xr_clusters); 6420 cpos = 0; 6421 while (cpos < clusters) { 6422 ret = ocfs2_xattr_get_clusters(args->old_inode, 6423 cpos, 6424 &p_cluster, 6425 &num_clusters, 6426 &xv->xr_list, 6427 &ext_flags); 6428 if (ret) { 6429 mlog_errno(ret); 6430 goto out; 6431 } 6432 6433 BUG_ON(!p_cluster); 6434 6435 if (xv->xr_list.l_tree_depth) { 6436 ret = ocfs2_insert_extent(handle, 6437 &data_et, cpos, 6438 ocfs2_clusters_to_blocks( 6439 args->old_inode->i_sb, 6440 p_cluster), 6441 num_clusters, ext_flags, 6442 meta_ac); 6443 if (ret) { 6444 mlog_errno(ret); 6445 goto out; 6446 } 6447 } 6448 6449 ret = ocfs2_increase_refcount(handle, args->ref_ci, 6450 args->ref_root_bh, 6451 p_cluster, num_clusters, 6452 meta_ac, args->dealloc); 6453 if (ret) { 6454 mlog_errno(ret); 6455 goto out; 6456 } 6457 6458 cpos += num_clusters; 6459 } 6460 } 6461 6462 out: 6463 return ret; 6464 } 6465 6466 static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args) 6467 { 6468 int ret = 0, credits = 0; 6469 handle_t *handle; 6470 struct ocfs2_super *osb = OCFS2_SB(args->old_inode->i_sb); 6471 struct ocfs2_dinode *di = (struct ocfs2_dinode *)args->old_bh->b_data; 6472 int inline_size = le16_to_cpu(di->i_xattr_inline_size); 6473 int header_off = osb->sb->s_blocksize - inline_size; 6474 struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *) 6475 (args->old_bh->b_data + header_off); 6476 struct ocfs2_xattr_header *new_xh = (struct ocfs2_xattr_header *) 6477 (args->new_bh->b_data + header_off); 6478 struct ocfs2_alloc_context *meta_ac = NULL; 6479 struct ocfs2_inode_info *new_oi; 6480 struct ocfs2_dinode *new_di; 6481 struct ocfs2_xattr_value_buf vb = { 6482 .vb_bh = args->new_bh, 6483 .vb_access = ocfs2_journal_access_di, 6484 }; 6485 6486 ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh, 6487 &credits, &meta_ac); 6488 if (ret) { 6489 mlog_errno(ret); 6490 goto out; 6491 } 6492 6493 handle = ocfs2_start_trans(osb, credits); 6494 if (IS_ERR(handle)) { 6495 ret = PTR_ERR(handle); 6496 mlog_errno(ret); 6497 goto out; 6498 } 6499 6500 ret = ocfs2_journal_access_di(handle, INODE_CACHE(args->new_inode), 6501 args->new_bh, OCFS2_JOURNAL_ACCESS_WRITE); 6502 if (ret) { 6503 mlog_errno(ret); 6504 goto out_commit; 6505 } 6506 6507 memcpy(args->new_bh->b_data + header_off, 6508 args->old_bh->b_data + header_off, inline_size); 6509 6510 new_di = (struct ocfs2_dinode *)args->new_bh->b_data; 6511 new_di->i_xattr_inline_size = cpu_to_le16(inline_size); 6512 6513 ret = ocfs2_reflink_xattr_header(handle, args, args->old_bh, xh, 6514 args->new_bh, new_xh, &vb, meta_ac, 6515 ocfs2_get_xattr_value_root, NULL); 6516 if (ret) { 6517 mlog_errno(ret); 6518 goto out_commit; 6519 } 6520 6521 new_oi = OCFS2_I(args->new_inode); 6522 6523 spin_lock(&new_oi->ip_lock); 6524 new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL; 6525 new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features); 6526 spin_unlock(&new_oi->ip_lock); 6527 6528 ocfs2_journal_dirty(handle, args->new_bh); 6529 6530 out_commit: 6531 ocfs2_commit_trans(osb, handle); 6532 6533 out: 6534 if (meta_ac) 6535 ocfs2_free_alloc_context(meta_ac); 6536 return ret; 6537 } 6538 6539 static int ocfs2_create_empty_xattr_block(struct inode *inode, 6540 struct buffer_head *fe_bh, 6541 struct buffer_head **ret_bh, 6542 int indexed) 6543 { 6544 int ret; 6545 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 6546 struct ocfs2_xattr_set_ctxt ctxt; 6547 6548 memset(&ctxt, 0, sizeof(ctxt)); 6549 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &ctxt.meta_ac); 6550 if (ret < 0) { 6551 mlog_errno(ret); 6552 return ret; 6553 } 6554 6555 ctxt.handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_CREATE_CREDITS); 6556 if (IS_ERR(ctxt.handle)) { 6557 ret = PTR_ERR(ctxt.handle); 6558 mlog_errno(ret); 6559 goto out; 6560 } 6561 6562 trace_ocfs2_create_empty_xattr_block( 6563 (unsigned long long)fe_bh->b_blocknr, indexed); 6564 ret = ocfs2_create_xattr_block(inode, fe_bh, &ctxt, indexed, 6565 ret_bh); 6566 if (ret) 6567 mlog_errno(ret); 6568 6569 ocfs2_commit_trans(osb, ctxt.handle); 6570 out: 6571 ocfs2_free_alloc_context(ctxt.meta_ac); 6572 return ret; 6573 } 6574 6575 static int ocfs2_reflink_xattr_block(struct ocfs2_xattr_reflink *args, 6576 struct buffer_head *blk_bh, 6577 struct buffer_head *new_blk_bh) 6578 { 6579 int ret = 0, credits = 0; 6580 handle_t *handle; 6581 struct ocfs2_inode_info *new_oi = OCFS2_I(args->new_inode); 6582 struct ocfs2_dinode *new_di; 6583 struct ocfs2_super *osb = OCFS2_SB(args->new_inode->i_sb); 6584 int header_off = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); 6585 struct ocfs2_xattr_block *xb = 6586 (struct ocfs2_xattr_block *)blk_bh->b_data; 6587 struct ocfs2_xattr_header *xh = &xb->xb_attrs.xb_header; 6588 struct ocfs2_xattr_block *new_xb = 6589 (struct ocfs2_xattr_block *)new_blk_bh->b_data; 6590 struct ocfs2_xattr_header *new_xh = &new_xb->xb_attrs.xb_header; 6591 struct ocfs2_alloc_context *meta_ac; 6592 struct ocfs2_xattr_value_buf vb = { 6593 .vb_bh = new_blk_bh, 6594 .vb_access = ocfs2_journal_access_xb, 6595 }; 6596 6597 ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh, 6598 &credits, &meta_ac); 6599 if (ret) { 6600 mlog_errno(ret); 6601 return ret; 6602 } 6603 6604 /* One more credits in case we need to add xattr flags in new inode. */ 6605 handle = ocfs2_start_trans(osb, credits + 1); 6606 if (IS_ERR(handle)) { 6607 ret = PTR_ERR(handle); 6608 mlog_errno(ret); 6609 goto out; 6610 } 6611 6612 if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) { 6613 ret = ocfs2_journal_access_di(handle, 6614 INODE_CACHE(args->new_inode), 6615 args->new_bh, 6616 OCFS2_JOURNAL_ACCESS_WRITE); 6617 if (ret) { 6618 mlog_errno(ret); 6619 goto out_commit; 6620 } 6621 } 6622 6623 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(args->new_inode), 6624 new_blk_bh, OCFS2_JOURNAL_ACCESS_WRITE); 6625 if (ret) { 6626 mlog_errno(ret); 6627 goto out_commit; 6628 } 6629 6630 memcpy(new_blk_bh->b_data + header_off, blk_bh->b_data + header_off, 6631 osb->sb->s_blocksize - header_off); 6632 6633 ret = ocfs2_reflink_xattr_header(handle, args, blk_bh, xh, 6634 new_blk_bh, new_xh, &vb, meta_ac, 6635 ocfs2_get_xattr_value_root, NULL); 6636 if (ret) { 6637 mlog_errno(ret); 6638 goto out_commit; 6639 } 6640 6641 ocfs2_journal_dirty(handle, new_blk_bh); 6642 6643 if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) { 6644 new_di = (struct ocfs2_dinode *)args->new_bh->b_data; 6645 spin_lock(&new_oi->ip_lock); 6646 new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL; 6647 new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features); 6648 spin_unlock(&new_oi->ip_lock); 6649 6650 ocfs2_journal_dirty(handle, args->new_bh); 6651 } 6652 6653 out_commit: 6654 ocfs2_commit_trans(osb, handle); 6655 6656 out: 6657 ocfs2_free_alloc_context(meta_ac); 6658 return ret; 6659 } 6660 6661 struct ocfs2_reflink_xattr_tree_args { 6662 struct ocfs2_xattr_reflink *reflink; 6663 struct buffer_head *old_blk_bh; 6664 struct buffer_head *new_blk_bh; 6665 struct ocfs2_xattr_bucket *old_bucket; 6666 struct ocfs2_xattr_bucket *new_bucket; 6667 }; 6668 6669 /* 6670 * NOTE: 6671 * We have to handle the case that both old bucket and new bucket 6672 * will call this function to get the right ret_bh. 6673 * So The caller must give us the right bh. 6674 */ 6675 static int ocfs2_get_reflink_xattr_value_root(struct super_block *sb, 6676 struct buffer_head *bh, 6677 struct ocfs2_xattr_header *xh, 6678 int offset, 6679 struct ocfs2_xattr_value_root **xv, 6680 struct buffer_head **ret_bh, 6681 void *para) 6682 { 6683 struct ocfs2_reflink_xattr_tree_args *args = 6684 (struct ocfs2_reflink_xattr_tree_args *)para; 6685 struct ocfs2_xattr_bucket *bucket; 6686 6687 if (bh == args->old_bucket->bu_bhs[0]) 6688 bucket = args->old_bucket; 6689 else 6690 bucket = args->new_bucket; 6691 6692 return ocfs2_get_xattr_tree_value_root(sb, bucket, offset, 6693 xv, ret_bh); 6694 } 6695 6696 struct ocfs2_value_tree_metas { 6697 int num_metas; 6698 int credits; 6699 int num_recs; 6700 }; 6701 6702 static int ocfs2_value_tree_metas_in_bucket(struct super_block *sb, 6703 struct buffer_head *bh, 6704 struct ocfs2_xattr_header *xh, 6705 int offset, 6706 struct ocfs2_xattr_value_root **xv, 6707 struct buffer_head **ret_bh, 6708 void *para) 6709 { 6710 struct ocfs2_xattr_bucket *bucket = 6711 (struct ocfs2_xattr_bucket *)para; 6712 6713 return ocfs2_get_xattr_tree_value_root(sb, bucket, offset, 6714 xv, ret_bh); 6715 } 6716 6717 static int ocfs2_calc_value_tree_metas(struct inode *inode, 6718 struct ocfs2_xattr_bucket *bucket, 6719 void *para) 6720 { 6721 struct ocfs2_value_tree_metas *metas = 6722 (struct ocfs2_value_tree_metas *)para; 6723 struct ocfs2_xattr_header *xh = 6724 (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data; 6725 6726 /* Add the credits for this bucket first. */ 6727 metas->credits += bucket->bu_blocks; 6728 return ocfs2_value_metas_in_xattr_header(inode->i_sb, bucket->bu_bhs[0], 6729 xh, &metas->num_metas, 6730 &metas->credits, &metas->num_recs, 6731 ocfs2_value_tree_metas_in_bucket, 6732 bucket); 6733 } 6734 6735 /* 6736 * Given a xattr extent rec starting from blkno and having len clusters, 6737 * iterate all the buckets calculate how much metadata we need for reflinking 6738 * all the ocfs2_xattr_value_root and lock the allocators accordingly. 6739 */ 6740 static int ocfs2_lock_reflink_xattr_rec_allocators( 6741 struct ocfs2_reflink_xattr_tree_args *args, 6742 struct ocfs2_extent_tree *xt_et, 6743 u64 blkno, u32 len, int *credits, 6744 struct ocfs2_alloc_context **meta_ac, 6745 struct ocfs2_alloc_context **data_ac) 6746 { 6747 int ret, num_free_extents; 6748 struct ocfs2_value_tree_metas metas; 6749 struct ocfs2_super *osb = OCFS2_SB(args->reflink->old_inode->i_sb); 6750 struct ocfs2_refcount_block *rb; 6751 6752 memset(&metas, 0, sizeof(metas)); 6753 6754 ret = ocfs2_iterate_xattr_buckets(args->reflink->old_inode, blkno, len, 6755 ocfs2_calc_value_tree_metas, &metas); 6756 if (ret) { 6757 mlog_errno(ret); 6758 goto out; 6759 } 6760 6761 *credits = metas.credits; 6762 6763 /* 6764 * Calculate we need for refcount tree change. 6765 * 6766 * We need to add/modify num_recs in refcount tree, so just calculate 6767 * an approximate number we need for refcount tree change. 6768 * Sometimes we need to split the tree, and after split, half recs 6769 * will be moved to the new block, and a new block can only provide 6770 * half number of recs. So we multiple new blocks by 2. 6771 * In the end, we have to add credits for modifying the already 6772 * existed refcount block. 6773 */ 6774 rb = (struct ocfs2_refcount_block *)args->reflink->ref_root_bh->b_data; 6775 metas.num_recs = 6776 (metas.num_recs + ocfs2_refcount_recs_per_rb(osb->sb) - 1) / 6777 ocfs2_refcount_recs_per_rb(osb->sb) * 2; 6778 metas.num_metas += metas.num_recs; 6779 *credits += metas.num_recs + 6780 metas.num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS; 6781 if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL) 6782 *credits += le16_to_cpu(rb->rf_list.l_tree_depth) * 6783 le16_to_cpu(rb->rf_list.l_next_free_rec) + 1; 6784 else 6785 *credits += 1; 6786 6787 /* count in the xattr tree change. */ 6788 num_free_extents = ocfs2_num_free_extents(xt_et); 6789 if (num_free_extents < 0) { 6790 ret = num_free_extents; 6791 mlog_errno(ret); 6792 goto out; 6793 } 6794 6795 if (num_free_extents < len) 6796 metas.num_metas += ocfs2_extend_meta_needed(xt_et->et_root_el); 6797 6798 *credits += ocfs2_calc_extend_credits(osb->sb, 6799 xt_et->et_root_el); 6800 6801 if (metas.num_metas) { 6802 ret = ocfs2_reserve_new_metadata_blocks(osb, metas.num_metas, 6803 meta_ac); 6804 if (ret) { 6805 mlog_errno(ret); 6806 goto out; 6807 } 6808 } 6809 6810 if (len) { 6811 ret = ocfs2_reserve_clusters(osb, len, data_ac); 6812 if (ret) 6813 mlog_errno(ret); 6814 } 6815 out: 6816 if (ret) { 6817 if (*meta_ac) { 6818 ocfs2_free_alloc_context(*meta_ac); 6819 *meta_ac = NULL; 6820 } 6821 } 6822 6823 return ret; 6824 } 6825 6826 static int ocfs2_reflink_xattr_bucket(handle_t *handle, 6827 u64 blkno, u64 new_blkno, u32 clusters, 6828 u32 *cpos, int num_buckets, 6829 struct ocfs2_alloc_context *meta_ac, 6830 struct ocfs2_alloc_context *data_ac, 6831 struct ocfs2_reflink_xattr_tree_args *args) 6832 { 6833 int i, j, ret = 0; 6834 struct super_block *sb = args->reflink->old_inode->i_sb; 6835 int bpb = args->old_bucket->bu_blocks; 6836 struct ocfs2_xattr_value_buf vb = { 6837 .vb_access = ocfs2_journal_access, 6838 }; 6839 6840 for (i = 0; i < num_buckets; i++, blkno += bpb, new_blkno += bpb) { 6841 ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno); 6842 if (ret) { 6843 mlog_errno(ret); 6844 break; 6845 } 6846 6847 ret = ocfs2_init_xattr_bucket(args->new_bucket, new_blkno, 1); 6848 if (ret) { 6849 mlog_errno(ret); 6850 break; 6851 } 6852 6853 ret = ocfs2_xattr_bucket_journal_access(handle, 6854 args->new_bucket, 6855 OCFS2_JOURNAL_ACCESS_CREATE); 6856 if (ret) { 6857 mlog_errno(ret); 6858 break; 6859 } 6860 6861 for (j = 0; j < bpb; j++) 6862 memcpy(bucket_block(args->new_bucket, j), 6863 bucket_block(args->old_bucket, j), 6864 sb->s_blocksize); 6865 6866 /* 6867 * Record the start cpos so that we can use it to initialize 6868 * our xattr tree we also set the xh_num_bucket for the new 6869 * bucket. 6870 */ 6871 if (i == 0) { 6872 *cpos = le32_to_cpu(bucket_xh(args->new_bucket)-> 6873 xh_entries[0].xe_name_hash); 6874 bucket_xh(args->new_bucket)->xh_num_buckets = 6875 cpu_to_le16(num_buckets); 6876 } 6877 6878 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket); 6879 6880 ret = ocfs2_reflink_xattr_header(handle, args->reflink, 6881 args->old_bucket->bu_bhs[0], 6882 bucket_xh(args->old_bucket), 6883 args->new_bucket->bu_bhs[0], 6884 bucket_xh(args->new_bucket), 6885 &vb, meta_ac, 6886 ocfs2_get_reflink_xattr_value_root, 6887 args); 6888 if (ret) { 6889 mlog_errno(ret); 6890 break; 6891 } 6892 6893 /* 6894 * Re-access and dirty the bucket to calculate metaecc. 6895 * Because we may extend the transaction in reflink_xattr_header 6896 * which will let the already accessed block gone. 6897 */ 6898 ret = ocfs2_xattr_bucket_journal_access(handle, 6899 args->new_bucket, 6900 OCFS2_JOURNAL_ACCESS_WRITE); 6901 if (ret) { 6902 mlog_errno(ret); 6903 break; 6904 } 6905 6906 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket); 6907 6908 ocfs2_xattr_bucket_relse(args->old_bucket); 6909 ocfs2_xattr_bucket_relse(args->new_bucket); 6910 } 6911 6912 ocfs2_xattr_bucket_relse(args->old_bucket); 6913 ocfs2_xattr_bucket_relse(args->new_bucket); 6914 return ret; 6915 } 6916 6917 static int ocfs2_reflink_xattr_buckets(handle_t *handle, 6918 struct inode *inode, 6919 struct ocfs2_reflink_xattr_tree_args *args, 6920 struct ocfs2_extent_tree *et, 6921 struct ocfs2_alloc_context *meta_ac, 6922 struct ocfs2_alloc_context *data_ac, 6923 u64 blkno, u32 cpos, u32 len) 6924 { 6925 int ret, first_inserted = 0; 6926 u32 p_cluster, num_clusters, reflink_cpos = 0; 6927 u64 new_blkno; 6928 unsigned int num_buckets, reflink_buckets; 6929 unsigned int bpc = 6930 ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)); 6931 6932 ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno); 6933 if (ret) { 6934 mlog_errno(ret); 6935 goto out; 6936 } 6937 num_buckets = le16_to_cpu(bucket_xh(args->old_bucket)->xh_num_buckets); 6938 ocfs2_xattr_bucket_relse(args->old_bucket); 6939 6940 while (len && num_buckets) { 6941 ret = ocfs2_claim_clusters(handle, data_ac, 6942 1, &p_cluster, &num_clusters); 6943 if (ret) { 6944 mlog_errno(ret); 6945 goto out; 6946 } 6947 6948 new_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 6949 reflink_buckets = min(num_buckets, bpc * num_clusters); 6950 6951 ret = ocfs2_reflink_xattr_bucket(handle, blkno, 6952 new_blkno, num_clusters, 6953 &reflink_cpos, reflink_buckets, 6954 meta_ac, data_ac, args); 6955 if (ret) { 6956 mlog_errno(ret); 6957 goto out; 6958 } 6959 6960 /* 6961 * For the 1st allocated cluster, we make it use the same cpos 6962 * so that the xattr tree looks the same as the original one 6963 * in the most case. 6964 */ 6965 if (!first_inserted) { 6966 reflink_cpos = cpos; 6967 first_inserted = 1; 6968 } 6969 ret = ocfs2_insert_extent(handle, et, reflink_cpos, new_blkno, 6970 num_clusters, 0, meta_ac); 6971 if (ret) 6972 mlog_errno(ret); 6973 6974 trace_ocfs2_reflink_xattr_buckets((unsigned long long)new_blkno, 6975 num_clusters, reflink_cpos); 6976 6977 len -= num_clusters; 6978 blkno += ocfs2_clusters_to_blocks(inode->i_sb, num_clusters); 6979 num_buckets -= reflink_buckets; 6980 } 6981 out: 6982 return ret; 6983 } 6984 6985 /* 6986 * Create the same xattr extent record in the new inode's xattr tree. 6987 */ 6988 static int ocfs2_reflink_xattr_rec(struct inode *inode, 6989 struct buffer_head *root_bh, 6990 u64 blkno, 6991 u32 cpos, 6992 u32 len, 6993 void *para) 6994 { 6995 int ret, credits = 0; 6996 handle_t *handle; 6997 struct ocfs2_reflink_xattr_tree_args *args = 6998 (struct ocfs2_reflink_xattr_tree_args *)para; 6999 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 7000 struct ocfs2_alloc_context *meta_ac = NULL; 7001 struct ocfs2_alloc_context *data_ac = NULL; 7002 struct ocfs2_extent_tree et; 7003 7004 trace_ocfs2_reflink_xattr_rec((unsigned long long)blkno, len); 7005 7006 ocfs2_init_xattr_tree_extent_tree(&et, 7007 INODE_CACHE(args->reflink->new_inode), 7008 args->new_blk_bh); 7009 7010 ret = ocfs2_lock_reflink_xattr_rec_allocators(args, &et, blkno, 7011 len, &credits, 7012 &meta_ac, &data_ac); 7013 if (ret) { 7014 mlog_errno(ret); 7015 goto out; 7016 } 7017 7018 handle = ocfs2_start_trans(osb, credits); 7019 if (IS_ERR(handle)) { 7020 ret = PTR_ERR(handle); 7021 mlog_errno(ret); 7022 goto out; 7023 } 7024 7025 ret = ocfs2_reflink_xattr_buckets(handle, inode, args, &et, 7026 meta_ac, data_ac, 7027 blkno, cpos, len); 7028 if (ret) 7029 mlog_errno(ret); 7030 7031 ocfs2_commit_trans(osb, handle); 7032 7033 out: 7034 if (meta_ac) 7035 ocfs2_free_alloc_context(meta_ac); 7036 if (data_ac) 7037 ocfs2_free_alloc_context(data_ac); 7038 return ret; 7039 } 7040 7041 /* 7042 * Create reflinked xattr buckets. 7043 * We will add bucket one by one, and refcount all the xattrs in the bucket 7044 * if they are stored outside. 7045 */ 7046 static int ocfs2_reflink_xattr_tree(struct ocfs2_xattr_reflink *args, 7047 struct buffer_head *blk_bh, 7048 struct buffer_head *new_blk_bh) 7049 { 7050 int ret; 7051 struct ocfs2_reflink_xattr_tree_args para; 7052 7053 memset(¶, 0, sizeof(para)); 7054 para.reflink = args; 7055 para.old_blk_bh = blk_bh; 7056 para.new_blk_bh = new_blk_bh; 7057 7058 para.old_bucket = ocfs2_xattr_bucket_new(args->old_inode); 7059 if (!para.old_bucket) { 7060 mlog_errno(-ENOMEM); 7061 return -ENOMEM; 7062 } 7063 7064 para.new_bucket = ocfs2_xattr_bucket_new(args->new_inode); 7065 if (!para.new_bucket) { 7066 ret = -ENOMEM; 7067 mlog_errno(ret); 7068 goto out; 7069 } 7070 7071 ret = ocfs2_iterate_xattr_index_block(args->old_inode, blk_bh, 7072 ocfs2_reflink_xattr_rec, 7073 ¶); 7074 if (ret) 7075 mlog_errno(ret); 7076 7077 out: 7078 ocfs2_xattr_bucket_free(para.old_bucket); 7079 ocfs2_xattr_bucket_free(para.new_bucket); 7080 return ret; 7081 } 7082 7083 static int ocfs2_reflink_xattr_in_block(struct ocfs2_xattr_reflink *args, 7084 struct buffer_head *blk_bh) 7085 { 7086 int ret, indexed = 0; 7087 struct buffer_head *new_blk_bh = NULL; 7088 struct ocfs2_xattr_block *xb = 7089 (struct ocfs2_xattr_block *)blk_bh->b_data; 7090 7091 7092 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) 7093 indexed = 1; 7094 7095 ret = ocfs2_create_empty_xattr_block(args->new_inode, args->new_bh, 7096 &new_blk_bh, indexed); 7097 if (ret) { 7098 mlog_errno(ret); 7099 goto out; 7100 } 7101 7102 if (!indexed) 7103 ret = ocfs2_reflink_xattr_block(args, blk_bh, new_blk_bh); 7104 else 7105 ret = ocfs2_reflink_xattr_tree(args, blk_bh, new_blk_bh); 7106 if (ret) 7107 mlog_errno(ret); 7108 7109 out: 7110 brelse(new_blk_bh); 7111 return ret; 7112 } 7113 7114 static int ocfs2_reflink_xattr_no_security(struct ocfs2_xattr_entry *xe) 7115 { 7116 int type = ocfs2_xattr_get_type(xe); 7117 7118 return type != OCFS2_XATTR_INDEX_SECURITY && 7119 type != OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS && 7120 type != OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT; 7121 } 7122 7123 int ocfs2_reflink_xattrs(struct inode *old_inode, 7124 struct buffer_head *old_bh, 7125 struct inode *new_inode, 7126 struct buffer_head *new_bh, 7127 bool preserve_security) 7128 { 7129 int ret; 7130 struct ocfs2_xattr_reflink args; 7131 struct ocfs2_inode_info *oi = OCFS2_I(old_inode); 7132 struct ocfs2_dinode *di = (struct ocfs2_dinode *)old_bh->b_data; 7133 struct buffer_head *blk_bh = NULL; 7134 struct ocfs2_cached_dealloc_ctxt dealloc; 7135 struct ocfs2_refcount_tree *ref_tree; 7136 struct buffer_head *ref_root_bh = NULL; 7137 7138 ret = ocfs2_lock_refcount_tree(OCFS2_SB(old_inode->i_sb), 7139 le64_to_cpu(di->i_refcount_loc), 7140 1, &ref_tree, &ref_root_bh); 7141 if (ret) { 7142 mlog_errno(ret); 7143 goto out; 7144 } 7145 7146 ocfs2_init_dealloc_ctxt(&dealloc); 7147 7148 args.old_inode = old_inode; 7149 args.new_inode = new_inode; 7150 args.old_bh = old_bh; 7151 args.new_bh = new_bh; 7152 args.ref_ci = &ref_tree->rf_ci; 7153 args.ref_root_bh = ref_root_bh; 7154 args.dealloc = &dealloc; 7155 if (preserve_security) 7156 args.xattr_reflinked = NULL; 7157 else 7158 args.xattr_reflinked = ocfs2_reflink_xattr_no_security; 7159 7160 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 7161 ret = ocfs2_reflink_xattr_inline(&args); 7162 if (ret) { 7163 mlog_errno(ret); 7164 goto out_unlock; 7165 } 7166 } 7167 7168 if (!di->i_xattr_loc) 7169 goto out_unlock; 7170 7171 ret = ocfs2_read_xattr_block(old_inode, le64_to_cpu(di->i_xattr_loc), 7172 &blk_bh); 7173 if (ret < 0) { 7174 mlog_errno(ret); 7175 goto out_unlock; 7176 } 7177 7178 ret = ocfs2_reflink_xattr_in_block(&args, blk_bh); 7179 if (ret) 7180 mlog_errno(ret); 7181 7182 brelse(blk_bh); 7183 7184 out_unlock: 7185 ocfs2_unlock_refcount_tree(OCFS2_SB(old_inode->i_sb), 7186 ref_tree, 1); 7187 brelse(ref_root_bh); 7188 7189 if (ocfs2_dealloc_has_cluster(&dealloc)) { 7190 ocfs2_schedule_truncate_log_flush(OCFS2_SB(old_inode->i_sb), 1); 7191 ocfs2_run_deallocs(OCFS2_SB(old_inode->i_sb), &dealloc); 7192 } 7193 7194 out: 7195 return ret; 7196 } 7197 7198 /* 7199 * Initialize security and acl for a already created inode. 7200 * Used for reflink a non-preserve-security file. 7201 * 7202 * It uses common api like ocfs2_xattr_set, so the caller 7203 * must not hold any lock expect i_rwsem. 7204 */ 7205 int ocfs2_init_security_and_acl(struct inode *dir, 7206 struct inode *inode, 7207 const struct qstr *qstr) 7208 { 7209 int ret = 0; 7210 struct buffer_head *dir_bh = NULL; 7211 7212 ret = ocfs2_init_security_get(inode, dir, qstr, NULL); 7213 if (ret) { 7214 mlog_errno(ret); 7215 goto leave; 7216 } 7217 7218 ret = ocfs2_inode_lock(dir, &dir_bh, 0); 7219 if (ret) { 7220 mlog_errno(ret); 7221 goto leave; 7222 } 7223 ret = ocfs2_init_acl(NULL, inode, dir, NULL, dir_bh, NULL, NULL); 7224 if (ret) 7225 mlog_errno(ret); 7226 7227 ocfs2_inode_unlock(dir, 0); 7228 brelse(dir_bh); 7229 leave: 7230 return ret; 7231 } 7232 7233 /* 7234 * 'security' attributes support 7235 */ 7236 static int ocfs2_xattr_security_get(const struct xattr_handler *handler, 7237 struct dentry *unused, struct inode *inode, 7238 const char *name, void *buffer, size_t size) 7239 { 7240 return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_SECURITY, 7241 name, buffer, size); 7242 } 7243 7244 static int ocfs2_xattr_security_set(const struct xattr_handler *handler, 7245 struct mnt_idmap *idmap, 7246 struct dentry *unused, struct inode *inode, 7247 const char *name, const void *value, 7248 size_t size, int flags) 7249 { 7250 return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, 7251 name, value, size, flags); 7252 } 7253 7254 static int ocfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array, 7255 void *fs_info) 7256 { 7257 struct ocfs2_security_xattr_info *si = fs_info; 7258 const struct xattr *xattr; 7259 int err = 0; 7260 7261 if (si) { 7262 si->value = kmemdup(xattr_array->value, xattr_array->value_len, 7263 GFP_KERNEL); 7264 if (!si->value) 7265 return -ENOMEM; 7266 7267 si->name = xattr_array->name; 7268 si->value_len = xattr_array->value_len; 7269 return 0; 7270 } 7271 7272 for (xattr = xattr_array; xattr->name != NULL; xattr++) { 7273 err = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, 7274 xattr->name, xattr->value, 7275 xattr->value_len, XATTR_CREATE); 7276 if (err) 7277 break; 7278 } 7279 return err; 7280 } 7281 7282 int ocfs2_init_security_get(struct inode *inode, 7283 struct inode *dir, 7284 const struct qstr *qstr, 7285 struct ocfs2_security_xattr_info *si) 7286 { 7287 int ret; 7288 7289 /* check whether ocfs2 support feature xattr */ 7290 if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb))) 7291 return -EOPNOTSUPP; 7292 if (si) { 7293 ret = security_inode_init_security(inode, dir, qstr, 7294 &ocfs2_initxattrs, si); 7295 /* 7296 * security_inode_init_security() does not return -EOPNOTSUPP, 7297 * we have to check the xattr ourselves. 7298 */ 7299 if (!ret && !si->name) 7300 si->enable = 0; 7301 7302 return ret; 7303 } 7304 7305 return security_inode_init_security(inode, dir, qstr, 7306 &ocfs2_initxattrs, NULL); 7307 } 7308 7309 int ocfs2_init_security_set(handle_t *handle, 7310 struct inode *inode, 7311 struct buffer_head *di_bh, 7312 struct ocfs2_security_xattr_info *si, 7313 struct ocfs2_alloc_context *xattr_ac, 7314 struct ocfs2_alloc_context *data_ac) 7315 { 7316 return ocfs2_xattr_set_handle(handle, inode, di_bh, 7317 OCFS2_XATTR_INDEX_SECURITY, 7318 si->name, si->value, si->value_len, 0, 7319 xattr_ac, data_ac); 7320 } 7321 7322 const struct xattr_handler ocfs2_xattr_security_handler = { 7323 .prefix = XATTR_SECURITY_PREFIX, 7324 .get = ocfs2_xattr_security_get, 7325 .set = ocfs2_xattr_security_set, 7326 }; 7327 7328 /* 7329 * 'trusted' attributes support 7330 */ 7331 static int ocfs2_xattr_trusted_get(const struct xattr_handler *handler, 7332 struct dentry *unused, struct inode *inode, 7333 const char *name, void *buffer, size_t size) 7334 { 7335 return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_TRUSTED, 7336 name, buffer, size); 7337 } 7338 7339 static int ocfs2_xattr_trusted_set(const struct xattr_handler *handler, 7340 struct mnt_idmap *idmap, 7341 struct dentry *unused, struct inode *inode, 7342 const char *name, const void *value, 7343 size_t size, int flags) 7344 { 7345 return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_TRUSTED, 7346 name, value, size, flags); 7347 } 7348 7349 const struct xattr_handler ocfs2_xattr_trusted_handler = { 7350 .prefix = XATTR_TRUSTED_PREFIX, 7351 .get = ocfs2_xattr_trusted_get, 7352 .set = ocfs2_xattr_trusted_set, 7353 }; 7354 7355 /* 7356 * 'user' attributes support 7357 */ 7358 static int ocfs2_xattr_user_get(const struct xattr_handler *handler, 7359 struct dentry *unused, struct inode *inode, 7360 const char *name, void *buffer, size_t size) 7361 { 7362 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 7363 7364 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 7365 return -EOPNOTSUPP; 7366 return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_USER, name, 7367 buffer, size); 7368 } 7369 7370 static int ocfs2_xattr_user_set(const struct xattr_handler *handler, 7371 struct mnt_idmap *idmap, 7372 struct dentry *unused, struct inode *inode, 7373 const char *name, const void *value, 7374 size_t size, int flags) 7375 { 7376 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 7377 7378 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 7379 return -EOPNOTSUPP; 7380 7381 return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_USER, 7382 name, value, size, flags); 7383 } 7384 7385 const struct xattr_handler ocfs2_xattr_user_handler = { 7386 .prefix = XATTR_USER_PREFIX, 7387 .get = ocfs2_xattr_user_get, 7388 .set = ocfs2_xattr_user_set, 7389 }; 7390