1 // SPDX-License-Identifier: GPL-2.0-only 2 /* -*- mode: c; c-basic-offset: 8; -*- 3 * vim: noexpandtab sw=8 ts=8 sts=0: 4 * 5 * xattr.c 6 * 7 * Copyright (C) 2004, 2008 Oracle. All rights reserved. 8 * 9 * CREDITS: 10 * Lots of code in this file is copy from linux/fs/ext3/xattr.c. 11 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> 12 */ 13 14 #include <linux/capability.h> 15 #include <linux/fs.h> 16 #include <linux/types.h> 17 #include <linux/slab.h> 18 #include <linux/highmem.h> 19 #include <linux/pagemap.h> 20 #include <linux/uio.h> 21 #include <linux/sched.h> 22 #include <linux/splice.h> 23 #include <linux/mount.h> 24 #include <linux/writeback.h> 25 #include <linux/falloc.h> 26 #include <linux/sort.h> 27 #include <linux/init.h> 28 #include <linux/module.h> 29 #include <linux/string.h> 30 #include <linux/security.h> 31 32 #include <cluster/masklog.h> 33 34 #include "ocfs2.h" 35 #include "alloc.h" 36 #include "blockcheck.h" 37 #include "dlmglue.h" 38 #include "file.h" 39 #include "symlink.h" 40 #include "sysfile.h" 41 #include "inode.h" 42 #include "journal.h" 43 #include "ocfs2_fs.h" 44 #include "suballoc.h" 45 #include "uptodate.h" 46 #include "buffer_head_io.h" 47 #include "super.h" 48 #include "xattr.h" 49 #include "refcounttree.h" 50 #include "acl.h" 51 #include "ocfs2_trace.h" 52 53 struct ocfs2_xattr_def_value_root { 54 struct ocfs2_xattr_value_root xv; 55 struct ocfs2_extent_rec er; 56 }; 57 58 struct ocfs2_xattr_bucket { 59 /* The inode these xattrs are associated with */ 60 struct inode *bu_inode; 61 62 /* The actual buffers that make up the bucket */ 63 struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET]; 64 65 /* How many blocks make up one bucket for this filesystem */ 66 int bu_blocks; 67 }; 68 69 struct ocfs2_xattr_set_ctxt { 70 handle_t *handle; 71 struct ocfs2_alloc_context *meta_ac; 72 struct ocfs2_alloc_context *data_ac; 73 struct ocfs2_cached_dealloc_ctxt dealloc; 74 int set_abort; 75 }; 76 77 #define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root)) 78 #define OCFS2_XATTR_INLINE_SIZE 80 79 #define OCFS2_XATTR_HEADER_GAP 4 80 #define OCFS2_XATTR_FREE_IN_IBODY (OCFS2_MIN_XATTR_INLINE_SIZE \ 81 - sizeof(struct ocfs2_xattr_header) \ 82 - OCFS2_XATTR_HEADER_GAP) 83 #define OCFS2_XATTR_FREE_IN_BLOCK(ptr) ((ptr)->i_sb->s_blocksize \ 84 - sizeof(struct ocfs2_xattr_block) \ 85 - sizeof(struct ocfs2_xattr_header) \ 86 - OCFS2_XATTR_HEADER_GAP) 87 88 static struct ocfs2_xattr_def_value_root def_xv = { 89 .xv.xr_list.l_count = cpu_to_le16(1), 90 }; 91 92 const struct xattr_handler *ocfs2_xattr_handlers[] = { 93 &ocfs2_xattr_user_handler, 94 &posix_acl_access_xattr_handler, 95 &posix_acl_default_xattr_handler, 96 &ocfs2_xattr_trusted_handler, 97 &ocfs2_xattr_security_handler, 98 NULL 99 }; 100 101 static const struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = { 102 [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler, 103 [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS] 104 = &posix_acl_access_xattr_handler, 105 [OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT] 106 = &posix_acl_default_xattr_handler, 107 [OCFS2_XATTR_INDEX_TRUSTED] = &ocfs2_xattr_trusted_handler, 108 [OCFS2_XATTR_INDEX_SECURITY] = &ocfs2_xattr_security_handler, 109 }; 110 111 struct ocfs2_xattr_info { 112 int xi_name_index; 113 const char *xi_name; 114 int xi_name_len; 115 const void *xi_value; 116 size_t xi_value_len; 117 }; 118 119 struct ocfs2_xattr_search { 120 struct buffer_head *inode_bh; 121 /* 122 * xattr_bh point to the block buffer head which has extended attribute 123 * when extended attribute in inode, xattr_bh is equal to inode_bh. 124 */ 125 struct buffer_head *xattr_bh; 126 struct ocfs2_xattr_header *header; 127 struct ocfs2_xattr_bucket *bucket; 128 void *base; 129 void *end; 130 struct ocfs2_xattr_entry *here; 131 int not_found; 132 }; 133 134 /* Operations on struct ocfs2_xa_entry */ 135 struct ocfs2_xa_loc; 136 struct ocfs2_xa_loc_operations { 137 /* 138 * Journal functions 139 */ 140 int (*xlo_journal_access)(handle_t *handle, struct ocfs2_xa_loc *loc, 141 int type); 142 void (*xlo_journal_dirty)(handle_t *handle, struct ocfs2_xa_loc *loc); 143 144 /* 145 * Return a pointer to the appropriate buffer in loc->xl_storage 146 * at the given offset from loc->xl_header. 147 */ 148 void *(*xlo_offset_pointer)(struct ocfs2_xa_loc *loc, int offset); 149 150 /* Can we reuse the existing entry for the new value? */ 151 int (*xlo_can_reuse)(struct ocfs2_xa_loc *loc, 152 struct ocfs2_xattr_info *xi); 153 154 /* How much space is needed for the new value? */ 155 int (*xlo_check_space)(struct ocfs2_xa_loc *loc, 156 struct ocfs2_xattr_info *xi); 157 158 /* 159 * Return the offset of the first name+value pair. This is 160 * the start of our downward-filling free space. 161 */ 162 int (*xlo_get_free_start)(struct ocfs2_xa_loc *loc); 163 164 /* 165 * Remove the name+value at this location. Do whatever is 166 * appropriate with the remaining name+value pairs. 167 */ 168 void (*xlo_wipe_namevalue)(struct ocfs2_xa_loc *loc); 169 170 /* Fill xl_entry with a new entry */ 171 void (*xlo_add_entry)(struct ocfs2_xa_loc *loc, u32 name_hash); 172 173 /* Add name+value storage to an entry */ 174 void (*xlo_add_namevalue)(struct ocfs2_xa_loc *loc, int size); 175 176 /* 177 * Initialize the value buf's access and bh fields for this entry. 178 * ocfs2_xa_fill_value_buf() will handle the xv pointer. 179 */ 180 void (*xlo_fill_value_buf)(struct ocfs2_xa_loc *loc, 181 struct ocfs2_xattr_value_buf *vb); 182 }; 183 184 /* 185 * Describes an xattr entry location. This is a memory structure 186 * tracking the on-disk structure. 187 */ 188 struct ocfs2_xa_loc { 189 /* This xattr belongs to this inode */ 190 struct inode *xl_inode; 191 192 /* The ocfs2_xattr_header inside the on-disk storage. Not NULL. */ 193 struct ocfs2_xattr_header *xl_header; 194 195 /* Bytes from xl_header to the end of the storage */ 196 int xl_size; 197 198 /* 199 * The ocfs2_xattr_entry this location describes. If this is 200 * NULL, this location describes the on-disk structure where it 201 * would have been. 202 */ 203 struct ocfs2_xattr_entry *xl_entry; 204 205 /* 206 * Internal housekeeping 207 */ 208 209 /* Buffer(s) containing this entry */ 210 void *xl_storage; 211 212 /* Operations on the storage backing this location */ 213 const struct ocfs2_xa_loc_operations *xl_ops; 214 }; 215 216 /* 217 * Convenience functions to calculate how much space is needed for a 218 * given name+value pair 219 */ 220 static int namevalue_size(int name_len, uint64_t value_len) 221 { 222 if (value_len > OCFS2_XATTR_INLINE_SIZE) 223 return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE; 224 else 225 return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len); 226 } 227 228 static int namevalue_size_xi(struct ocfs2_xattr_info *xi) 229 { 230 return namevalue_size(xi->xi_name_len, xi->xi_value_len); 231 } 232 233 static int namevalue_size_xe(struct ocfs2_xattr_entry *xe) 234 { 235 u64 value_len = le64_to_cpu(xe->xe_value_size); 236 237 BUG_ON((value_len > OCFS2_XATTR_INLINE_SIZE) && 238 ocfs2_xattr_is_local(xe)); 239 return namevalue_size(xe->xe_name_len, value_len); 240 } 241 242 243 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb, 244 struct ocfs2_xattr_header *xh, 245 int index, 246 int *block_off, 247 int *new_offset); 248 249 static int ocfs2_xattr_block_find(struct inode *inode, 250 int name_index, 251 const char *name, 252 struct ocfs2_xattr_search *xs); 253 static int ocfs2_xattr_index_block_find(struct inode *inode, 254 struct buffer_head *root_bh, 255 int name_index, 256 const char *name, 257 struct ocfs2_xattr_search *xs); 258 259 static int ocfs2_xattr_tree_list_index_block(struct inode *inode, 260 struct buffer_head *blk_bh, 261 char *buffer, 262 size_t buffer_size); 263 264 static int ocfs2_xattr_create_index_block(struct inode *inode, 265 struct ocfs2_xattr_search *xs, 266 struct ocfs2_xattr_set_ctxt *ctxt); 267 268 static int ocfs2_xattr_set_entry_index_block(struct inode *inode, 269 struct ocfs2_xattr_info *xi, 270 struct ocfs2_xattr_search *xs, 271 struct ocfs2_xattr_set_ctxt *ctxt); 272 273 typedef int (xattr_tree_rec_func)(struct inode *inode, 274 struct buffer_head *root_bh, 275 u64 blkno, u32 cpos, u32 len, void *para); 276 static int ocfs2_iterate_xattr_index_block(struct inode *inode, 277 struct buffer_head *root_bh, 278 xattr_tree_rec_func *rec_func, 279 void *para); 280 static int ocfs2_delete_xattr_in_bucket(struct inode *inode, 281 struct ocfs2_xattr_bucket *bucket, 282 void *para); 283 static int ocfs2_rm_xattr_cluster(struct inode *inode, 284 struct buffer_head *root_bh, 285 u64 blkno, 286 u32 cpos, 287 u32 len, 288 void *para); 289 290 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle, 291 u64 src_blk, u64 last_blk, u64 to_blk, 292 unsigned int start_bucket, 293 u32 *first_hash); 294 static int ocfs2_prepare_refcount_xattr(struct inode *inode, 295 struct ocfs2_dinode *di, 296 struct ocfs2_xattr_info *xi, 297 struct ocfs2_xattr_search *xis, 298 struct ocfs2_xattr_search *xbs, 299 struct ocfs2_refcount_tree **ref_tree, 300 int *meta_need, 301 int *credits); 302 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb, 303 struct ocfs2_xattr_bucket *bucket, 304 int offset, 305 struct ocfs2_xattr_value_root **xv, 306 struct buffer_head **bh); 307 308 static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb) 309 { 310 return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE; 311 } 312 313 static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb) 314 { 315 return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits); 316 } 317 318 #define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr) 319 #define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data) 320 #define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0)) 321 322 static struct ocfs2_xattr_bucket *ocfs2_xattr_bucket_new(struct inode *inode) 323 { 324 struct ocfs2_xattr_bucket *bucket; 325 int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 326 327 BUG_ON(blks > OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET); 328 329 bucket = kzalloc(sizeof(struct ocfs2_xattr_bucket), GFP_NOFS); 330 if (bucket) { 331 bucket->bu_inode = inode; 332 bucket->bu_blocks = blks; 333 } 334 335 return bucket; 336 } 337 338 static void ocfs2_xattr_bucket_relse(struct ocfs2_xattr_bucket *bucket) 339 { 340 int i; 341 342 for (i = 0; i < bucket->bu_blocks; i++) { 343 brelse(bucket->bu_bhs[i]); 344 bucket->bu_bhs[i] = NULL; 345 } 346 } 347 348 static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket) 349 { 350 if (bucket) { 351 ocfs2_xattr_bucket_relse(bucket); 352 bucket->bu_inode = NULL; 353 kfree(bucket); 354 } 355 } 356 357 /* 358 * A bucket that has never been written to disk doesn't need to be 359 * read. We just need the buffer_heads. Don't call this for 360 * buckets that are already on disk. ocfs2_read_xattr_bucket() initializes 361 * them fully. 362 */ 363 static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket, 364 u64 xb_blkno, int new) 365 { 366 int i, rc = 0; 367 368 for (i = 0; i < bucket->bu_blocks; i++) { 369 bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb, 370 xb_blkno + i); 371 if (!bucket->bu_bhs[i]) { 372 rc = -ENOMEM; 373 mlog_errno(rc); 374 break; 375 } 376 377 if (!ocfs2_buffer_uptodate(INODE_CACHE(bucket->bu_inode), 378 bucket->bu_bhs[i])) { 379 if (new) 380 ocfs2_set_new_buffer_uptodate(INODE_CACHE(bucket->bu_inode), 381 bucket->bu_bhs[i]); 382 else { 383 set_buffer_uptodate(bucket->bu_bhs[i]); 384 ocfs2_set_buffer_uptodate(INODE_CACHE(bucket->bu_inode), 385 bucket->bu_bhs[i]); 386 } 387 } 388 } 389 390 if (rc) 391 ocfs2_xattr_bucket_relse(bucket); 392 return rc; 393 } 394 395 /* Read the xattr bucket at xb_blkno */ 396 static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket, 397 u64 xb_blkno) 398 { 399 int rc; 400 401 rc = ocfs2_read_blocks(INODE_CACHE(bucket->bu_inode), xb_blkno, 402 bucket->bu_blocks, bucket->bu_bhs, 0, 403 NULL); 404 if (!rc) { 405 spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 406 rc = ocfs2_validate_meta_ecc_bhs(bucket->bu_inode->i_sb, 407 bucket->bu_bhs, 408 bucket->bu_blocks, 409 &bucket_xh(bucket)->xh_check); 410 spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 411 if (rc) 412 mlog_errno(rc); 413 } 414 415 if (rc) 416 ocfs2_xattr_bucket_relse(bucket); 417 return rc; 418 } 419 420 static int ocfs2_xattr_bucket_journal_access(handle_t *handle, 421 struct ocfs2_xattr_bucket *bucket, 422 int type) 423 { 424 int i, rc = 0; 425 426 for (i = 0; i < bucket->bu_blocks; i++) { 427 rc = ocfs2_journal_access(handle, 428 INODE_CACHE(bucket->bu_inode), 429 bucket->bu_bhs[i], type); 430 if (rc) { 431 mlog_errno(rc); 432 break; 433 } 434 } 435 436 return rc; 437 } 438 439 static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle, 440 struct ocfs2_xattr_bucket *bucket) 441 { 442 int i; 443 444 spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 445 ocfs2_compute_meta_ecc_bhs(bucket->bu_inode->i_sb, 446 bucket->bu_bhs, bucket->bu_blocks, 447 &bucket_xh(bucket)->xh_check); 448 spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 449 450 for (i = 0; i < bucket->bu_blocks; i++) 451 ocfs2_journal_dirty(handle, bucket->bu_bhs[i]); 452 } 453 454 static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest, 455 struct ocfs2_xattr_bucket *src) 456 { 457 int i; 458 int blocksize = src->bu_inode->i_sb->s_blocksize; 459 460 BUG_ON(dest->bu_blocks != src->bu_blocks); 461 BUG_ON(dest->bu_inode != src->bu_inode); 462 463 for (i = 0; i < src->bu_blocks; i++) { 464 memcpy(bucket_block(dest, i), bucket_block(src, i), 465 blocksize); 466 } 467 } 468 469 static int ocfs2_validate_xattr_block(struct super_block *sb, 470 struct buffer_head *bh) 471 { 472 int rc; 473 struct ocfs2_xattr_block *xb = 474 (struct ocfs2_xattr_block *)bh->b_data; 475 476 trace_ocfs2_validate_xattr_block((unsigned long long)bh->b_blocknr); 477 478 BUG_ON(!buffer_uptodate(bh)); 479 480 /* 481 * If the ecc fails, we return the error but otherwise 482 * leave the filesystem running. We know any error is 483 * local to this block. 484 */ 485 rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &xb->xb_check); 486 if (rc) 487 return rc; 488 489 /* 490 * Errors after here are fatal 491 */ 492 493 if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) { 494 return ocfs2_error(sb, 495 "Extended attribute block #%llu has bad signature %.*s\n", 496 (unsigned long long)bh->b_blocknr, 7, 497 xb->xb_signature); 498 } 499 500 if (le64_to_cpu(xb->xb_blkno) != bh->b_blocknr) { 501 return ocfs2_error(sb, 502 "Extended attribute block #%llu has an invalid xb_blkno of %llu\n", 503 (unsigned long long)bh->b_blocknr, 504 (unsigned long long)le64_to_cpu(xb->xb_blkno)); 505 } 506 507 if (le32_to_cpu(xb->xb_fs_generation) != OCFS2_SB(sb)->fs_generation) { 508 return ocfs2_error(sb, 509 "Extended attribute block #%llu has an invalid xb_fs_generation of #%u\n", 510 (unsigned long long)bh->b_blocknr, 511 le32_to_cpu(xb->xb_fs_generation)); 512 } 513 514 return 0; 515 } 516 517 static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno, 518 struct buffer_head **bh) 519 { 520 int rc; 521 struct buffer_head *tmp = *bh; 522 523 rc = ocfs2_read_block(INODE_CACHE(inode), xb_blkno, &tmp, 524 ocfs2_validate_xattr_block); 525 526 /* If ocfs2_read_block() got us a new bh, pass it up. */ 527 if (!rc && !*bh) 528 *bh = tmp; 529 530 return rc; 531 } 532 533 static inline const char *ocfs2_xattr_prefix(int name_index) 534 { 535 const struct xattr_handler *handler = NULL; 536 537 if (name_index > 0 && name_index < OCFS2_XATTR_MAX) 538 handler = ocfs2_xattr_handler_map[name_index]; 539 return handler ? xattr_prefix(handler) : NULL; 540 } 541 542 static u32 ocfs2_xattr_name_hash(struct inode *inode, 543 const char *name, 544 int name_len) 545 { 546 /* Get hash value of uuid from super block */ 547 u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash; 548 int i; 549 550 /* hash extended attribute name */ 551 for (i = 0; i < name_len; i++) { 552 hash = (hash << OCFS2_HASH_SHIFT) ^ 553 (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^ 554 *name++; 555 } 556 557 return hash; 558 } 559 560 static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len) 561 { 562 return namevalue_size(name_len, value_len) + 563 sizeof(struct ocfs2_xattr_entry); 564 } 565 566 static int ocfs2_xi_entry_usage(struct ocfs2_xattr_info *xi) 567 { 568 return namevalue_size_xi(xi) + 569 sizeof(struct ocfs2_xattr_entry); 570 } 571 572 static int ocfs2_xe_entry_usage(struct ocfs2_xattr_entry *xe) 573 { 574 return namevalue_size_xe(xe) + 575 sizeof(struct ocfs2_xattr_entry); 576 } 577 578 int ocfs2_calc_security_init(struct inode *dir, 579 struct ocfs2_security_xattr_info *si, 580 int *want_clusters, 581 int *xattr_credits, 582 struct ocfs2_alloc_context **xattr_ac) 583 { 584 int ret = 0; 585 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); 586 int s_size = ocfs2_xattr_entry_real_size(strlen(si->name), 587 si->value_len); 588 589 /* 590 * The max space of security xattr taken inline is 591 * 256(name) + 80(value) + 16(entry) = 352 bytes, 592 * So reserve one metadata block for it is ok. 593 */ 594 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE || 595 s_size > OCFS2_XATTR_FREE_IN_IBODY) { 596 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac); 597 if (ret) { 598 mlog_errno(ret); 599 return ret; 600 } 601 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; 602 } 603 604 /* reserve clusters for xattr value which will be set in B tree*/ 605 if (si->value_len > OCFS2_XATTR_INLINE_SIZE) { 606 int new_clusters = ocfs2_clusters_for_bytes(dir->i_sb, 607 si->value_len); 608 609 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb, 610 new_clusters); 611 *want_clusters += new_clusters; 612 } 613 return ret; 614 } 615 616 int ocfs2_calc_xattr_init(struct inode *dir, 617 struct buffer_head *dir_bh, 618 umode_t mode, 619 struct ocfs2_security_xattr_info *si, 620 int *want_clusters, 621 int *xattr_credits, 622 int *want_meta) 623 { 624 int ret = 0; 625 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); 626 int s_size = 0, a_size = 0, acl_len = 0, new_clusters; 627 628 if (si->enable) 629 s_size = ocfs2_xattr_entry_real_size(strlen(si->name), 630 si->value_len); 631 632 if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) { 633 down_read(&OCFS2_I(dir)->ip_xattr_sem); 634 acl_len = ocfs2_xattr_get_nolock(dir, dir_bh, 635 OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT, 636 "", NULL, 0); 637 up_read(&OCFS2_I(dir)->ip_xattr_sem); 638 if (acl_len > 0) { 639 a_size = ocfs2_xattr_entry_real_size(0, acl_len); 640 if (S_ISDIR(mode)) 641 a_size <<= 1; 642 } else if (acl_len != 0 && acl_len != -ENODATA) { 643 ret = acl_len; 644 mlog_errno(ret); 645 return ret; 646 } 647 } 648 649 if (!(s_size + a_size)) 650 return ret; 651 652 /* 653 * The max space of security xattr taken inline is 654 * 256(name) + 80(value) + 16(entry) = 352 bytes, 655 * The max space of acl xattr taken inline is 656 * 80(value) + 16(entry) * 2(if directory) = 192 bytes, 657 * when blocksize = 512, may reserve one more cluser for 658 * xattr bucket, otherwise reserve one metadata block 659 * for them is ok. 660 * If this is a new directory with inline data, 661 * we choose to reserve the entire inline area for 662 * directory contents and force an external xattr block. 663 */ 664 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE || 665 (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) || 666 (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) { 667 *want_meta = *want_meta + 1; 668 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; 669 } 670 671 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE && 672 (s_size + a_size) > OCFS2_XATTR_FREE_IN_BLOCK(dir)) { 673 *want_clusters += 1; 674 *xattr_credits += ocfs2_blocks_per_xattr_bucket(dir->i_sb); 675 } 676 677 /* 678 * reserve credits and clusters for xattrs which has large value 679 * and have to be set outside 680 */ 681 if (si->enable && si->value_len > OCFS2_XATTR_INLINE_SIZE) { 682 new_clusters = ocfs2_clusters_for_bytes(dir->i_sb, 683 si->value_len); 684 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb, 685 new_clusters); 686 *want_clusters += new_clusters; 687 } 688 if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL && 689 acl_len > OCFS2_XATTR_INLINE_SIZE) { 690 /* for directory, it has DEFAULT and ACCESS two types of acls */ 691 new_clusters = (S_ISDIR(mode) ? 2 : 1) * 692 ocfs2_clusters_for_bytes(dir->i_sb, acl_len); 693 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb, 694 new_clusters); 695 *want_clusters += new_clusters; 696 } 697 698 return ret; 699 } 700 701 static int ocfs2_xattr_extend_allocation(struct inode *inode, 702 u32 clusters_to_add, 703 struct ocfs2_xattr_value_buf *vb, 704 struct ocfs2_xattr_set_ctxt *ctxt) 705 { 706 int status = 0, credits; 707 handle_t *handle = ctxt->handle; 708 enum ocfs2_alloc_restarted why; 709 u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters); 710 struct ocfs2_extent_tree et; 711 712 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); 713 714 while (clusters_to_add) { 715 trace_ocfs2_xattr_extend_allocation(clusters_to_add); 716 717 status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh, 718 OCFS2_JOURNAL_ACCESS_WRITE); 719 if (status < 0) { 720 mlog_errno(status); 721 break; 722 } 723 724 prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters); 725 status = ocfs2_add_clusters_in_btree(handle, 726 &et, 727 &logical_start, 728 clusters_to_add, 729 0, 730 ctxt->data_ac, 731 ctxt->meta_ac, 732 &why); 733 if ((status < 0) && (status != -EAGAIN)) { 734 if (status != -ENOSPC) 735 mlog_errno(status); 736 break; 737 } 738 739 ocfs2_journal_dirty(handle, vb->vb_bh); 740 741 clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - 742 prev_clusters; 743 744 if (why != RESTART_NONE && clusters_to_add) { 745 /* 746 * We can only fail in case the alloc file doesn't give 747 * up enough clusters. 748 */ 749 BUG_ON(why == RESTART_META); 750 751 credits = ocfs2_calc_extend_credits(inode->i_sb, 752 &vb->vb_xv->xr_list); 753 status = ocfs2_extend_trans(handle, credits); 754 if (status < 0) { 755 status = -ENOMEM; 756 mlog_errno(status); 757 break; 758 } 759 } 760 } 761 762 return status; 763 } 764 765 static int __ocfs2_remove_xattr_range(struct inode *inode, 766 struct ocfs2_xattr_value_buf *vb, 767 u32 cpos, u32 phys_cpos, u32 len, 768 unsigned int ext_flags, 769 struct ocfs2_xattr_set_ctxt *ctxt) 770 { 771 int ret; 772 u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); 773 handle_t *handle = ctxt->handle; 774 struct ocfs2_extent_tree et; 775 776 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); 777 778 ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh, 779 OCFS2_JOURNAL_ACCESS_WRITE); 780 if (ret) { 781 mlog_errno(ret); 782 goto out; 783 } 784 785 ret = ocfs2_remove_extent(handle, &et, cpos, len, ctxt->meta_ac, 786 &ctxt->dealloc); 787 if (ret) { 788 mlog_errno(ret); 789 goto out; 790 } 791 792 le32_add_cpu(&vb->vb_xv->xr_clusters, -len); 793 ocfs2_journal_dirty(handle, vb->vb_bh); 794 795 if (ext_flags & OCFS2_EXT_REFCOUNTED) 796 ret = ocfs2_decrease_refcount(inode, handle, 797 ocfs2_blocks_to_clusters(inode->i_sb, 798 phys_blkno), 799 len, ctxt->meta_ac, &ctxt->dealloc, 1); 800 else 801 ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc, 802 phys_blkno, len); 803 if (ret) 804 mlog_errno(ret); 805 806 out: 807 return ret; 808 } 809 810 static int ocfs2_xattr_shrink_size(struct inode *inode, 811 u32 old_clusters, 812 u32 new_clusters, 813 struct ocfs2_xattr_value_buf *vb, 814 struct ocfs2_xattr_set_ctxt *ctxt) 815 { 816 int ret = 0; 817 unsigned int ext_flags; 818 u32 trunc_len, cpos, phys_cpos, alloc_size; 819 u64 block; 820 821 if (old_clusters <= new_clusters) 822 return 0; 823 824 cpos = new_clusters; 825 trunc_len = old_clusters - new_clusters; 826 while (trunc_len) { 827 ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos, 828 &alloc_size, 829 &vb->vb_xv->xr_list, &ext_flags); 830 if (ret) { 831 mlog_errno(ret); 832 goto out; 833 } 834 835 if (alloc_size > trunc_len) 836 alloc_size = trunc_len; 837 838 ret = __ocfs2_remove_xattr_range(inode, vb, cpos, 839 phys_cpos, alloc_size, 840 ext_flags, ctxt); 841 if (ret) { 842 mlog_errno(ret); 843 goto out; 844 } 845 846 block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); 847 ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), 848 block, alloc_size); 849 cpos += alloc_size; 850 trunc_len -= alloc_size; 851 } 852 853 out: 854 return ret; 855 } 856 857 static int ocfs2_xattr_value_truncate(struct inode *inode, 858 struct ocfs2_xattr_value_buf *vb, 859 int len, 860 struct ocfs2_xattr_set_ctxt *ctxt) 861 { 862 int ret; 863 u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len); 864 u32 old_clusters = le32_to_cpu(vb->vb_xv->xr_clusters); 865 866 if (new_clusters == old_clusters) 867 return 0; 868 869 if (new_clusters > old_clusters) 870 ret = ocfs2_xattr_extend_allocation(inode, 871 new_clusters - old_clusters, 872 vb, ctxt); 873 else 874 ret = ocfs2_xattr_shrink_size(inode, 875 old_clusters, new_clusters, 876 vb, ctxt); 877 878 return ret; 879 } 880 881 static int ocfs2_xattr_list_entry(struct super_block *sb, 882 char *buffer, size_t size, 883 size_t *result, int type, 884 const char *name, int name_len) 885 { 886 char *p = buffer + *result; 887 const char *prefix; 888 int prefix_len; 889 int total_len; 890 891 switch(type) { 892 case OCFS2_XATTR_INDEX_USER: 893 if (OCFS2_SB(sb)->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 894 return 0; 895 break; 896 897 case OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS: 898 case OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT: 899 if (!(sb->s_flags & SB_POSIXACL)) 900 return 0; 901 break; 902 903 case OCFS2_XATTR_INDEX_TRUSTED: 904 if (!capable(CAP_SYS_ADMIN)) 905 return 0; 906 break; 907 } 908 909 prefix = ocfs2_xattr_prefix(type); 910 if (!prefix) 911 return 0; 912 prefix_len = strlen(prefix); 913 total_len = prefix_len + name_len + 1; 914 *result += total_len; 915 916 /* we are just looking for how big our buffer needs to be */ 917 if (!size) 918 return 0; 919 920 if (*result > size) 921 return -ERANGE; 922 923 memcpy(p, prefix, prefix_len); 924 memcpy(p + prefix_len, name, name_len); 925 p[prefix_len + name_len] = '\0'; 926 927 return 0; 928 } 929 930 static int ocfs2_xattr_list_entries(struct inode *inode, 931 struct ocfs2_xattr_header *header, 932 char *buffer, size_t buffer_size) 933 { 934 size_t result = 0; 935 int i, type, ret; 936 const char *name; 937 938 for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) { 939 struct ocfs2_xattr_entry *entry = &header->xh_entries[i]; 940 type = ocfs2_xattr_get_type(entry); 941 name = (const char *)header + 942 le16_to_cpu(entry->xe_name_offset); 943 944 ret = ocfs2_xattr_list_entry(inode->i_sb, 945 buffer, buffer_size, 946 &result, type, name, 947 entry->xe_name_len); 948 if (ret) 949 return ret; 950 } 951 952 return result; 953 } 954 955 int ocfs2_has_inline_xattr_value_outside(struct inode *inode, 956 struct ocfs2_dinode *di) 957 { 958 struct ocfs2_xattr_header *xh; 959 int i; 960 961 xh = (struct ocfs2_xattr_header *) 962 ((void *)di + inode->i_sb->s_blocksize - 963 le16_to_cpu(di->i_xattr_inline_size)); 964 965 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) 966 if (!ocfs2_xattr_is_local(&xh->xh_entries[i])) 967 return 1; 968 969 return 0; 970 } 971 972 static int ocfs2_xattr_ibody_list(struct inode *inode, 973 struct ocfs2_dinode *di, 974 char *buffer, 975 size_t buffer_size) 976 { 977 struct ocfs2_xattr_header *header = NULL; 978 struct ocfs2_inode_info *oi = OCFS2_I(inode); 979 int ret = 0; 980 981 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) 982 return ret; 983 984 header = (struct ocfs2_xattr_header *) 985 ((void *)di + inode->i_sb->s_blocksize - 986 le16_to_cpu(di->i_xattr_inline_size)); 987 988 ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size); 989 990 return ret; 991 } 992 993 static int ocfs2_xattr_block_list(struct inode *inode, 994 struct ocfs2_dinode *di, 995 char *buffer, 996 size_t buffer_size) 997 { 998 struct buffer_head *blk_bh = NULL; 999 struct ocfs2_xattr_block *xb; 1000 int ret = 0; 1001 1002 if (!di->i_xattr_loc) 1003 return ret; 1004 1005 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc), 1006 &blk_bh); 1007 if (ret < 0) { 1008 mlog_errno(ret); 1009 return ret; 1010 } 1011 1012 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 1013 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 1014 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header; 1015 ret = ocfs2_xattr_list_entries(inode, header, 1016 buffer, buffer_size); 1017 } else 1018 ret = ocfs2_xattr_tree_list_index_block(inode, blk_bh, 1019 buffer, buffer_size); 1020 1021 brelse(blk_bh); 1022 1023 return ret; 1024 } 1025 1026 ssize_t ocfs2_listxattr(struct dentry *dentry, 1027 char *buffer, 1028 size_t size) 1029 { 1030 int ret = 0, i_ret = 0, b_ret = 0; 1031 struct buffer_head *di_bh = NULL; 1032 struct ocfs2_dinode *di = NULL; 1033 struct ocfs2_inode_info *oi = OCFS2_I(d_inode(dentry)); 1034 1035 if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb))) 1036 return -EOPNOTSUPP; 1037 1038 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) 1039 return ret; 1040 1041 ret = ocfs2_inode_lock(d_inode(dentry), &di_bh, 0); 1042 if (ret < 0) { 1043 mlog_errno(ret); 1044 return ret; 1045 } 1046 1047 di = (struct ocfs2_dinode *)di_bh->b_data; 1048 1049 down_read(&oi->ip_xattr_sem); 1050 i_ret = ocfs2_xattr_ibody_list(d_inode(dentry), di, buffer, size); 1051 if (i_ret < 0) 1052 b_ret = 0; 1053 else { 1054 if (buffer) { 1055 buffer += i_ret; 1056 size -= i_ret; 1057 } 1058 b_ret = ocfs2_xattr_block_list(d_inode(dentry), di, 1059 buffer, size); 1060 if (b_ret < 0) 1061 i_ret = 0; 1062 } 1063 up_read(&oi->ip_xattr_sem); 1064 ocfs2_inode_unlock(d_inode(dentry), 0); 1065 1066 brelse(di_bh); 1067 1068 return i_ret + b_ret; 1069 } 1070 1071 static int ocfs2_xattr_find_entry(int name_index, 1072 const char *name, 1073 struct ocfs2_xattr_search *xs) 1074 { 1075 struct ocfs2_xattr_entry *entry; 1076 size_t name_len; 1077 int i, cmp = 1; 1078 1079 if (name == NULL) 1080 return -EINVAL; 1081 1082 name_len = strlen(name); 1083 entry = xs->here; 1084 for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) { 1085 cmp = name_index - ocfs2_xattr_get_type(entry); 1086 if (!cmp) 1087 cmp = name_len - entry->xe_name_len; 1088 if (!cmp) 1089 cmp = memcmp(name, (xs->base + 1090 le16_to_cpu(entry->xe_name_offset)), 1091 name_len); 1092 if (cmp == 0) 1093 break; 1094 entry += 1; 1095 } 1096 xs->here = entry; 1097 1098 return cmp ? -ENODATA : 0; 1099 } 1100 1101 static int ocfs2_xattr_get_value_outside(struct inode *inode, 1102 struct ocfs2_xattr_value_root *xv, 1103 void *buffer, 1104 size_t len) 1105 { 1106 u32 cpos, p_cluster, num_clusters, bpc, clusters; 1107 u64 blkno; 1108 int i, ret = 0; 1109 size_t cplen, blocksize; 1110 struct buffer_head *bh = NULL; 1111 struct ocfs2_extent_list *el; 1112 1113 el = &xv->xr_list; 1114 clusters = le32_to_cpu(xv->xr_clusters); 1115 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 1116 blocksize = inode->i_sb->s_blocksize; 1117 1118 cpos = 0; 1119 while (cpos < clusters) { 1120 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, 1121 &num_clusters, el, NULL); 1122 if (ret) { 1123 mlog_errno(ret); 1124 goto out; 1125 } 1126 1127 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 1128 /* Copy ocfs2_xattr_value */ 1129 for (i = 0; i < num_clusters * bpc; i++, blkno++) { 1130 ret = ocfs2_read_block(INODE_CACHE(inode), blkno, 1131 &bh, NULL); 1132 if (ret) { 1133 mlog_errno(ret); 1134 goto out; 1135 } 1136 1137 cplen = len >= blocksize ? blocksize : len; 1138 memcpy(buffer, bh->b_data, cplen); 1139 len -= cplen; 1140 buffer += cplen; 1141 1142 brelse(bh); 1143 bh = NULL; 1144 if (len == 0) 1145 break; 1146 } 1147 cpos += num_clusters; 1148 } 1149 out: 1150 return ret; 1151 } 1152 1153 static int ocfs2_xattr_ibody_get(struct inode *inode, 1154 int name_index, 1155 const char *name, 1156 void *buffer, 1157 size_t buffer_size, 1158 struct ocfs2_xattr_search *xs) 1159 { 1160 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1161 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 1162 struct ocfs2_xattr_value_root *xv; 1163 size_t size; 1164 int ret = 0; 1165 1166 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) 1167 return -ENODATA; 1168 1169 xs->end = (void *)di + inode->i_sb->s_blocksize; 1170 xs->header = (struct ocfs2_xattr_header *) 1171 (xs->end - le16_to_cpu(di->i_xattr_inline_size)); 1172 xs->base = (void *)xs->header; 1173 xs->here = xs->header->xh_entries; 1174 1175 ret = ocfs2_xattr_find_entry(name_index, name, xs); 1176 if (ret) 1177 return ret; 1178 size = le64_to_cpu(xs->here->xe_value_size); 1179 if (buffer) { 1180 if (size > buffer_size) 1181 return -ERANGE; 1182 if (ocfs2_xattr_is_local(xs->here)) { 1183 memcpy(buffer, (void *)xs->base + 1184 le16_to_cpu(xs->here->xe_name_offset) + 1185 OCFS2_XATTR_SIZE(xs->here->xe_name_len), size); 1186 } else { 1187 xv = (struct ocfs2_xattr_value_root *) 1188 (xs->base + le16_to_cpu( 1189 xs->here->xe_name_offset) + 1190 OCFS2_XATTR_SIZE(xs->here->xe_name_len)); 1191 ret = ocfs2_xattr_get_value_outside(inode, xv, 1192 buffer, size); 1193 if (ret < 0) { 1194 mlog_errno(ret); 1195 return ret; 1196 } 1197 } 1198 } 1199 1200 return size; 1201 } 1202 1203 static int ocfs2_xattr_block_get(struct inode *inode, 1204 int name_index, 1205 const char *name, 1206 void *buffer, 1207 size_t buffer_size, 1208 struct ocfs2_xattr_search *xs) 1209 { 1210 struct ocfs2_xattr_block *xb; 1211 struct ocfs2_xattr_value_root *xv; 1212 size_t size; 1213 int ret = -ENODATA, name_offset, name_len, i; 1214 int block_off; 1215 1216 xs->bucket = ocfs2_xattr_bucket_new(inode); 1217 if (!xs->bucket) { 1218 ret = -ENOMEM; 1219 mlog_errno(ret); 1220 goto cleanup; 1221 } 1222 1223 ret = ocfs2_xattr_block_find(inode, name_index, name, xs); 1224 if (ret) { 1225 mlog_errno(ret); 1226 goto cleanup; 1227 } 1228 1229 if (xs->not_found) { 1230 ret = -ENODATA; 1231 goto cleanup; 1232 } 1233 1234 xb = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; 1235 size = le64_to_cpu(xs->here->xe_value_size); 1236 if (buffer) { 1237 ret = -ERANGE; 1238 if (size > buffer_size) 1239 goto cleanup; 1240 1241 name_offset = le16_to_cpu(xs->here->xe_name_offset); 1242 name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len); 1243 i = xs->here - xs->header->xh_entries; 1244 1245 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 1246 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 1247 bucket_xh(xs->bucket), 1248 i, 1249 &block_off, 1250 &name_offset); 1251 if (ret) { 1252 mlog_errno(ret); 1253 goto cleanup; 1254 } 1255 xs->base = bucket_block(xs->bucket, block_off); 1256 } 1257 if (ocfs2_xattr_is_local(xs->here)) { 1258 memcpy(buffer, (void *)xs->base + 1259 name_offset + name_len, size); 1260 } else { 1261 xv = (struct ocfs2_xattr_value_root *) 1262 (xs->base + name_offset + name_len); 1263 ret = ocfs2_xattr_get_value_outside(inode, xv, 1264 buffer, size); 1265 if (ret < 0) { 1266 mlog_errno(ret); 1267 goto cleanup; 1268 } 1269 } 1270 } 1271 ret = size; 1272 cleanup: 1273 ocfs2_xattr_bucket_free(xs->bucket); 1274 1275 brelse(xs->xattr_bh); 1276 xs->xattr_bh = NULL; 1277 return ret; 1278 } 1279 1280 int ocfs2_xattr_get_nolock(struct inode *inode, 1281 struct buffer_head *di_bh, 1282 int name_index, 1283 const char *name, 1284 void *buffer, 1285 size_t buffer_size) 1286 { 1287 int ret; 1288 struct ocfs2_dinode *di = NULL; 1289 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1290 struct ocfs2_xattr_search xis = { 1291 .not_found = -ENODATA, 1292 }; 1293 struct ocfs2_xattr_search xbs = { 1294 .not_found = -ENODATA, 1295 }; 1296 1297 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) 1298 return -EOPNOTSUPP; 1299 1300 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) 1301 return -ENODATA; 1302 1303 xis.inode_bh = xbs.inode_bh = di_bh; 1304 di = (struct ocfs2_dinode *)di_bh->b_data; 1305 1306 ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer, 1307 buffer_size, &xis); 1308 if (ret == -ENODATA && di->i_xattr_loc) 1309 ret = ocfs2_xattr_block_get(inode, name_index, name, buffer, 1310 buffer_size, &xbs); 1311 1312 return ret; 1313 } 1314 1315 /* ocfs2_xattr_get() 1316 * 1317 * Copy an extended attribute into the buffer provided. 1318 * Buffer is NULL to compute the size of buffer required. 1319 */ 1320 static int ocfs2_xattr_get(struct inode *inode, 1321 int name_index, 1322 const char *name, 1323 void *buffer, 1324 size_t buffer_size) 1325 { 1326 int ret, had_lock; 1327 struct buffer_head *di_bh = NULL; 1328 struct ocfs2_lock_holder oh; 1329 1330 had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 0, &oh); 1331 if (had_lock < 0) { 1332 mlog_errno(had_lock); 1333 return had_lock; 1334 } 1335 down_read(&OCFS2_I(inode)->ip_xattr_sem); 1336 ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index, 1337 name, buffer, buffer_size); 1338 up_read(&OCFS2_I(inode)->ip_xattr_sem); 1339 1340 ocfs2_inode_unlock_tracker(inode, 0, &oh, had_lock); 1341 1342 brelse(di_bh); 1343 1344 return ret; 1345 } 1346 1347 static int __ocfs2_xattr_set_value_outside(struct inode *inode, 1348 handle_t *handle, 1349 struct ocfs2_xattr_value_buf *vb, 1350 const void *value, 1351 int value_len) 1352 { 1353 int ret = 0, i, cp_len; 1354 u16 blocksize = inode->i_sb->s_blocksize; 1355 u32 p_cluster, num_clusters; 1356 u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 1357 u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len); 1358 u64 blkno; 1359 struct buffer_head *bh = NULL; 1360 unsigned int ext_flags; 1361 struct ocfs2_xattr_value_root *xv = vb->vb_xv; 1362 1363 BUG_ON(clusters > le32_to_cpu(xv->xr_clusters)); 1364 1365 while (cpos < clusters) { 1366 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, 1367 &num_clusters, &xv->xr_list, 1368 &ext_flags); 1369 if (ret) { 1370 mlog_errno(ret); 1371 goto out; 1372 } 1373 1374 BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED); 1375 1376 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 1377 1378 for (i = 0; i < num_clusters * bpc; i++, blkno++) { 1379 ret = ocfs2_read_block(INODE_CACHE(inode), blkno, 1380 &bh, NULL); 1381 if (ret) { 1382 mlog_errno(ret); 1383 goto out; 1384 } 1385 1386 ret = ocfs2_journal_access(handle, 1387 INODE_CACHE(inode), 1388 bh, 1389 OCFS2_JOURNAL_ACCESS_WRITE); 1390 if (ret < 0) { 1391 mlog_errno(ret); 1392 goto out; 1393 } 1394 1395 cp_len = value_len > blocksize ? blocksize : value_len; 1396 memcpy(bh->b_data, value, cp_len); 1397 value_len -= cp_len; 1398 value += cp_len; 1399 if (cp_len < blocksize) 1400 memset(bh->b_data + cp_len, 0, 1401 blocksize - cp_len); 1402 1403 ocfs2_journal_dirty(handle, bh); 1404 brelse(bh); 1405 bh = NULL; 1406 1407 /* 1408 * XXX: do we need to empty all the following 1409 * blocks in this cluster? 1410 */ 1411 if (!value_len) 1412 break; 1413 } 1414 cpos += num_clusters; 1415 } 1416 out: 1417 brelse(bh); 1418 1419 return ret; 1420 } 1421 1422 static int ocfs2_xa_check_space_helper(int needed_space, int free_start, 1423 int num_entries) 1424 { 1425 int free_space; 1426 1427 if (!needed_space) 1428 return 0; 1429 1430 free_space = free_start - 1431 sizeof(struct ocfs2_xattr_header) - 1432 (num_entries * sizeof(struct ocfs2_xattr_entry)) - 1433 OCFS2_XATTR_HEADER_GAP; 1434 if (free_space < 0) 1435 return -EIO; 1436 if (free_space < needed_space) 1437 return -ENOSPC; 1438 1439 return 0; 1440 } 1441 1442 static int ocfs2_xa_journal_access(handle_t *handle, struct ocfs2_xa_loc *loc, 1443 int type) 1444 { 1445 return loc->xl_ops->xlo_journal_access(handle, loc, type); 1446 } 1447 1448 static void ocfs2_xa_journal_dirty(handle_t *handle, struct ocfs2_xa_loc *loc) 1449 { 1450 loc->xl_ops->xlo_journal_dirty(handle, loc); 1451 } 1452 1453 /* Give a pointer into the storage for the given offset */ 1454 static void *ocfs2_xa_offset_pointer(struct ocfs2_xa_loc *loc, int offset) 1455 { 1456 BUG_ON(offset >= loc->xl_size); 1457 return loc->xl_ops->xlo_offset_pointer(loc, offset); 1458 } 1459 1460 /* 1461 * Wipe the name+value pair and allow the storage to reclaim it. This 1462 * must be followed by either removal of the entry or a call to 1463 * ocfs2_xa_add_namevalue(). 1464 */ 1465 static void ocfs2_xa_wipe_namevalue(struct ocfs2_xa_loc *loc) 1466 { 1467 loc->xl_ops->xlo_wipe_namevalue(loc); 1468 } 1469 1470 /* 1471 * Find lowest offset to a name+value pair. This is the start of our 1472 * downward-growing free space. 1473 */ 1474 static int ocfs2_xa_get_free_start(struct ocfs2_xa_loc *loc) 1475 { 1476 return loc->xl_ops->xlo_get_free_start(loc); 1477 } 1478 1479 /* Can we reuse loc->xl_entry for xi? */ 1480 static int ocfs2_xa_can_reuse_entry(struct ocfs2_xa_loc *loc, 1481 struct ocfs2_xattr_info *xi) 1482 { 1483 return loc->xl_ops->xlo_can_reuse(loc, xi); 1484 } 1485 1486 /* How much free space is needed to set the new value */ 1487 static int ocfs2_xa_check_space(struct ocfs2_xa_loc *loc, 1488 struct ocfs2_xattr_info *xi) 1489 { 1490 return loc->xl_ops->xlo_check_space(loc, xi); 1491 } 1492 1493 static void ocfs2_xa_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) 1494 { 1495 loc->xl_ops->xlo_add_entry(loc, name_hash); 1496 loc->xl_entry->xe_name_hash = cpu_to_le32(name_hash); 1497 /* 1498 * We can't leave the new entry's xe_name_offset at zero or 1499 * add_namevalue() will go nuts. We set it to the size of our 1500 * storage so that it can never be less than any other entry. 1501 */ 1502 loc->xl_entry->xe_name_offset = cpu_to_le16(loc->xl_size); 1503 } 1504 1505 static void ocfs2_xa_add_namevalue(struct ocfs2_xa_loc *loc, 1506 struct ocfs2_xattr_info *xi) 1507 { 1508 int size = namevalue_size_xi(xi); 1509 int nameval_offset; 1510 char *nameval_buf; 1511 1512 loc->xl_ops->xlo_add_namevalue(loc, size); 1513 loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len); 1514 loc->xl_entry->xe_name_len = xi->xi_name_len; 1515 ocfs2_xattr_set_type(loc->xl_entry, xi->xi_name_index); 1516 ocfs2_xattr_set_local(loc->xl_entry, 1517 xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE); 1518 1519 nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 1520 nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset); 1521 memset(nameval_buf, 0, size); 1522 memcpy(nameval_buf, xi->xi_name, xi->xi_name_len); 1523 } 1524 1525 static void ocfs2_xa_fill_value_buf(struct ocfs2_xa_loc *loc, 1526 struct ocfs2_xattr_value_buf *vb) 1527 { 1528 int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 1529 int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len); 1530 1531 /* Value bufs are for value trees */ 1532 BUG_ON(ocfs2_xattr_is_local(loc->xl_entry)); 1533 BUG_ON(namevalue_size_xe(loc->xl_entry) != 1534 (name_size + OCFS2_XATTR_ROOT_SIZE)); 1535 1536 loc->xl_ops->xlo_fill_value_buf(loc, vb); 1537 vb->vb_xv = 1538 (struct ocfs2_xattr_value_root *)ocfs2_xa_offset_pointer(loc, 1539 nameval_offset + 1540 name_size); 1541 } 1542 1543 static int ocfs2_xa_block_journal_access(handle_t *handle, 1544 struct ocfs2_xa_loc *loc, int type) 1545 { 1546 struct buffer_head *bh = loc->xl_storage; 1547 ocfs2_journal_access_func access; 1548 1549 if (loc->xl_size == (bh->b_size - 1550 offsetof(struct ocfs2_xattr_block, 1551 xb_attrs.xb_header))) 1552 access = ocfs2_journal_access_xb; 1553 else 1554 access = ocfs2_journal_access_di; 1555 return access(handle, INODE_CACHE(loc->xl_inode), bh, type); 1556 } 1557 1558 static void ocfs2_xa_block_journal_dirty(handle_t *handle, 1559 struct ocfs2_xa_loc *loc) 1560 { 1561 struct buffer_head *bh = loc->xl_storage; 1562 1563 ocfs2_journal_dirty(handle, bh); 1564 } 1565 1566 static void *ocfs2_xa_block_offset_pointer(struct ocfs2_xa_loc *loc, 1567 int offset) 1568 { 1569 return (char *)loc->xl_header + offset; 1570 } 1571 1572 static int ocfs2_xa_block_can_reuse(struct ocfs2_xa_loc *loc, 1573 struct ocfs2_xattr_info *xi) 1574 { 1575 /* 1576 * Block storage is strict. If the sizes aren't exact, we will 1577 * remove the old one and reinsert the new. 1578 */ 1579 return namevalue_size_xe(loc->xl_entry) == 1580 namevalue_size_xi(xi); 1581 } 1582 1583 static int ocfs2_xa_block_get_free_start(struct ocfs2_xa_loc *loc) 1584 { 1585 struct ocfs2_xattr_header *xh = loc->xl_header; 1586 int i, count = le16_to_cpu(xh->xh_count); 1587 int offset, free_start = loc->xl_size; 1588 1589 for (i = 0; i < count; i++) { 1590 offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset); 1591 if (offset < free_start) 1592 free_start = offset; 1593 } 1594 1595 return free_start; 1596 } 1597 1598 static int ocfs2_xa_block_check_space(struct ocfs2_xa_loc *loc, 1599 struct ocfs2_xattr_info *xi) 1600 { 1601 int count = le16_to_cpu(loc->xl_header->xh_count); 1602 int free_start = ocfs2_xa_get_free_start(loc); 1603 int needed_space = ocfs2_xi_entry_usage(xi); 1604 1605 /* 1606 * Block storage will reclaim the original entry before inserting 1607 * the new value, so we only need the difference. If the new 1608 * entry is smaller than the old one, we don't need anything. 1609 */ 1610 if (loc->xl_entry) { 1611 /* Don't need space if we're reusing! */ 1612 if (ocfs2_xa_can_reuse_entry(loc, xi)) 1613 needed_space = 0; 1614 else 1615 needed_space -= ocfs2_xe_entry_usage(loc->xl_entry); 1616 } 1617 if (needed_space < 0) 1618 needed_space = 0; 1619 return ocfs2_xa_check_space_helper(needed_space, free_start, count); 1620 } 1621 1622 /* 1623 * Block storage for xattrs keeps the name+value pairs compacted. When 1624 * we remove one, we have to shift any that preceded it towards the end. 1625 */ 1626 static void ocfs2_xa_block_wipe_namevalue(struct ocfs2_xa_loc *loc) 1627 { 1628 int i, offset; 1629 int namevalue_offset, first_namevalue_offset, namevalue_size; 1630 struct ocfs2_xattr_entry *entry = loc->xl_entry; 1631 struct ocfs2_xattr_header *xh = loc->xl_header; 1632 int count = le16_to_cpu(xh->xh_count); 1633 1634 namevalue_offset = le16_to_cpu(entry->xe_name_offset); 1635 namevalue_size = namevalue_size_xe(entry); 1636 first_namevalue_offset = ocfs2_xa_get_free_start(loc); 1637 1638 /* Shift the name+value pairs */ 1639 memmove((char *)xh + first_namevalue_offset + namevalue_size, 1640 (char *)xh + first_namevalue_offset, 1641 namevalue_offset - first_namevalue_offset); 1642 memset((char *)xh + first_namevalue_offset, 0, namevalue_size); 1643 1644 /* Now tell xh->xh_entries about it */ 1645 for (i = 0; i < count; i++) { 1646 offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset); 1647 if (offset <= namevalue_offset) 1648 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, 1649 namevalue_size); 1650 } 1651 1652 /* 1653 * Note that we don't update xh_free_start or xh_name_value_len 1654 * because they're not used in block-stored xattrs. 1655 */ 1656 } 1657 1658 static void ocfs2_xa_block_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) 1659 { 1660 int count = le16_to_cpu(loc->xl_header->xh_count); 1661 loc->xl_entry = &(loc->xl_header->xh_entries[count]); 1662 le16_add_cpu(&loc->xl_header->xh_count, 1); 1663 memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry)); 1664 } 1665 1666 static void ocfs2_xa_block_add_namevalue(struct ocfs2_xa_loc *loc, int size) 1667 { 1668 int free_start = ocfs2_xa_get_free_start(loc); 1669 1670 loc->xl_entry->xe_name_offset = cpu_to_le16(free_start - size); 1671 } 1672 1673 static void ocfs2_xa_block_fill_value_buf(struct ocfs2_xa_loc *loc, 1674 struct ocfs2_xattr_value_buf *vb) 1675 { 1676 struct buffer_head *bh = loc->xl_storage; 1677 1678 if (loc->xl_size == (bh->b_size - 1679 offsetof(struct ocfs2_xattr_block, 1680 xb_attrs.xb_header))) 1681 vb->vb_access = ocfs2_journal_access_xb; 1682 else 1683 vb->vb_access = ocfs2_journal_access_di; 1684 vb->vb_bh = bh; 1685 } 1686 1687 /* 1688 * Operations for xattrs stored in blocks. This includes inline inode 1689 * storage and unindexed ocfs2_xattr_blocks. 1690 */ 1691 static const struct ocfs2_xa_loc_operations ocfs2_xa_block_loc_ops = { 1692 .xlo_journal_access = ocfs2_xa_block_journal_access, 1693 .xlo_journal_dirty = ocfs2_xa_block_journal_dirty, 1694 .xlo_offset_pointer = ocfs2_xa_block_offset_pointer, 1695 .xlo_check_space = ocfs2_xa_block_check_space, 1696 .xlo_can_reuse = ocfs2_xa_block_can_reuse, 1697 .xlo_get_free_start = ocfs2_xa_block_get_free_start, 1698 .xlo_wipe_namevalue = ocfs2_xa_block_wipe_namevalue, 1699 .xlo_add_entry = ocfs2_xa_block_add_entry, 1700 .xlo_add_namevalue = ocfs2_xa_block_add_namevalue, 1701 .xlo_fill_value_buf = ocfs2_xa_block_fill_value_buf, 1702 }; 1703 1704 static int ocfs2_xa_bucket_journal_access(handle_t *handle, 1705 struct ocfs2_xa_loc *loc, int type) 1706 { 1707 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1708 1709 return ocfs2_xattr_bucket_journal_access(handle, bucket, type); 1710 } 1711 1712 static void ocfs2_xa_bucket_journal_dirty(handle_t *handle, 1713 struct ocfs2_xa_loc *loc) 1714 { 1715 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1716 1717 ocfs2_xattr_bucket_journal_dirty(handle, bucket); 1718 } 1719 1720 static void *ocfs2_xa_bucket_offset_pointer(struct ocfs2_xa_loc *loc, 1721 int offset) 1722 { 1723 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1724 int block, block_offset; 1725 1726 /* The header is at the front of the bucket */ 1727 block = offset >> loc->xl_inode->i_sb->s_blocksize_bits; 1728 block_offset = offset % loc->xl_inode->i_sb->s_blocksize; 1729 1730 return bucket_block(bucket, block) + block_offset; 1731 } 1732 1733 static int ocfs2_xa_bucket_can_reuse(struct ocfs2_xa_loc *loc, 1734 struct ocfs2_xattr_info *xi) 1735 { 1736 return namevalue_size_xe(loc->xl_entry) >= 1737 namevalue_size_xi(xi); 1738 } 1739 1740 static int ocfs2_xa_bucket_get_free_start(struct ocfs2_xa_loc *loc) 1741 { 1742 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1743 return le16_to_cpu(bucket_xh(bucket)->xh_free_start); 1744 } 1745 1746 static int ocfs2_bucket_align_free_start(struct super_block *sb, 1747 int free_start, int size) 1748 { 1749 /* 1750 * We need to make sure that the name+value pair fits within 1751 * one block. 1752 */ 1753 if (((free_start - size) >> sb->s_blocksize_bits) != 1754 ((free_start - 1) >> sb->s_blocksize_bits)) 1755 free_start -= free_start % sb->s_blocksize; 1756 1757 return free_start; 1758 } 1759 1760 static int ocfs2_xa_bucket_check_space(struct ocfs2_xa_loc *loc, 1761 struct ocfs2_xattr_info *xi) 1762 { 1763 int rc; 1764 int count = le16_to_cpu(loc->xl_header->xh_count); 1765 int free_start = ocfs2_xa_get_free_start(loc); 1766 int needed_space = ocfs2_xi_entry_usage(xi); 1767 int size = namevalue_size_xi(xi); 1768 struct super_block *sb = loc->xl_inode->i_sb; 1769 1770 /* 1771 * Bucket storage does not reclaim name+value pairs it cannot 1772 * reuse. They live as holes until the bucket fills, and then 1773 * the bucket is defragmented. However, the bucket can reclaim 1774 * the ocfs2_xattr_entry. 1775 */ 1776 if (loc->xl_entry) { 1777 /* Don't need space if we're reusing! */ 1778 if (ocfs2_xa_can_reuse_entry(loc, xi)) 1779 needed_space = 0; 1780 else 1781 needed_space -= sizeof(struct ocfs2_xattr_entry); 1782 } 1783 BUG_ON(needed_space < 0); 1784 1785 if (free_start < size) { 1786 if (needed_space) 1787 return -ENOSPC; 1788 } else { 1789 /* 1790 * First we check if it would fit in the first place. 1791 * Below, we align the free start to a block. This may 1792 * slide us below the minimum gap. By checking unaligned 1793 * first, we avoid that error. 1794 */ 1795 rc = ocfs2_xa_check_space_helper(needed_space, free_start, 1796 count); 1797 if (rc) 1798 return rc; 1799 free_start = ocfs2_bucket_align_free_start(sb, free_start, 1800 size); 1801 } 1802 return ocfs2_xa_check_space_helper(needed_space, free_start, count); 1803 } 1804 1805 static void ocfs2_xa_bucket_wipe_namevalue(struct ocfs2_xa_loc *loc) 1806 { 1807 le16_add_cpu(&loc->xl_header->xh_name_value_len, 1808 -namevalue_size_xe(loc->xl_entry)); 1809 } 1810 1811 static void ocfs2_xa_bucket_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) 1812 { 1813 struct ocfs2_xattr_header *xh = loc->xl_header; 1814 int count = le16_to_cpu(xh->xh_count); 1815 int low = 0, high = count - 1, tmp; 1816 struct ocfs2_xattr_entry *tmp_xe; 1817 1818 /* 1819 * We keep buckets sorted by name_hash, so we need to find 1820 * our insert place. 1821 */ 1822 while (low <= high && count) { 1823 tmp = (low + high) / 2; 1824 tmp_xe = &xh->xh_entries[tmp]; 1825 1826 if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash)) 1827 low = tmp + 1; 1828 else if (name_hash < le32_to_cpu(tmp_xe->xe_name_hash)) 1829 high = tmp - 1; 1830 else { 1831 low = tmp; 1832 break; 1833 } 1834 } 1835 1836 if (low != count) 1837 memmove(&xh->xh_entries[low + 1], 1838 &xh->xh_entries[low], 1839 ((count - low) * sizeof(struct ocfs2_xattr_entry))); 1840 1841 le16_add_cpu(&xh->xh_count, 1); 1842 loc->xl_entry = &xh->xh_entries[low]; 1843 memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry)); 1844 } 1845 1846 static void ocfs2_xa_bucket_add_namevalue(struct ocfs2_xa_loc *loc, int size) 1847 { 1848 int free_start = ocfs2_xa_get_free_start(loc); 1849 struct ocfs2_xattr_header *xh = loc->xl_header; 1850 struct super_block *sb = loc->xl_inode->i_sb; 1851 int nameval_offset; 1852 1853 free_start = ocfs2_bucket_align_free_start(sb, free_start, size); 1854 nameval_offset = free_start - size; 1855 loc->xl_entry->xe_name_offset = cpu_to_le16(nameval_offset); 1856 xh->xh_free_start = cpu_to_le16(nameval_offset); 1857 le16_add_cpu(&xh->xh_name_value_len, size); 1858 1859 } 1860 1861 static void ocfs2_xa_bucket_fill_value_buf(struct ocfs2_xa_loc *loc, 1862 struct ocfs2_xattr_value_buf *vb) 1863 { 1864 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1865 struct super_block *sb = loc->xl_inode->i_sb; 1866 int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 1867 int size = namevalue_size_xe(loc->xl_entry); 1868 int block_offset = nameval_offset >> sb->s_blocksize_bits; 1869 1870 /* Values are not allowed to straddle block boundaries */ 1871 BUG_ON(block_offset != 1872 ((nameval_offset + size - 1) >> sb->s_blocksize_bits)); 1873 /* We expect the bucket to be filled in */ 1874 BUG_ON(!bucket->bu_bhs[block_offset]); 1875 1876 vb->vb_access = ocfs2_journal_access; 1877 vb->vb_bh = bucket->bu_bhs[block_offset]; 1878 } 1879 1880 /* Operations for xattrs stored in buckets. */ 1881 static const struct ocfs2_xa_loc_operations ocfs2_xa_bucket_loc_ops = { 1882 .xlo_journal_access = ocfs2_xa_bucket_journal_access, 1883 .xlo_journal_dirty = ocfs2_xa_bucket_journal_dirty, 1884 .xlo_offset_pointer = ocfs2_xa_bucket_offset_pointer, 1885 .xlo_check_space = ocfs2_xa_bucket_check_space, 1886 .xlo_can_reuse = ocfs2_xa_bucket_can_reuse, 1887 .xlo_get_free_start = ocfs2_xa_bucket_get_free_start, 1888 .xlo_wipe_namevalue = ocfs2_xa_bucket_wipe_namevalue, 1889 .xlo_add_entry = ocfs2_xa_bucket_add_entry, 1890 .xlo_add_namevalue = ocfs2_xa_bucket_add_namevalue, 1891 .xlo_fill_value_buf = ocfs2_xa_bucket_fill_value_buf, 1892 }; 1893 1894 static unsigned int ocfs2_xa_value_clusters(struct ocfs2_xa_loc *loc) 1895 { 1896 struct ocfs2_xattr_value_buf vb; 1897 1898 if (ocfs2_xattr_is_local(loc->xl_entry)) 1899 return 0; 1900 1901 ocfs2_xa_fill_value_buf(loc, &vb); 1902 return le32_to_cpu(vb.vb_xv->xr_clusters); 1903 } 1904 1905 static int ocfs2_xa_value_truncate(struct ocfs2_xa_loc *loc, u64 bytes, 1906 struct ocfs2_xattr_set_ctxt *ctxt) 1907 { 1908 int trunc_rc, access_rc; 1909 struct ocfs2_xattr_value_buf vb; 1910 1911 ocfs2_xa_fill_value_buf(loc, &vb); 1912 trunc_rc = ocfs2_xattr_value_truncate(loc->xl_inode, &vb, bytes, 1913 ctxt); 1914 1915 /* 1916 * The caller of ocfs2_xa_value_truncate() has already called 1917 * ocfs2_xa_journal_access on the loc. However, The truncate code 1918 * calls ocfs2_extend_trans(). This may commit the previous 1919 * transaction and open a new one. If this is a bucket, truncate 1920 * could leave only vb->vb_bh set up for journaling. Meanwhile, 1921 * the caller is expecting to dirty the entire bucket. So we must 1922 * reset the journal work. We do this even if truncate has failed, 1923 * as it could have failed after committing the extend. 1924 */ 1925 access_rc = ocfs2_xa_journal_access(ctxt->handle, loc, 1926 OCFS2_JOURNAL_ACCESS_WRITE); 1927 1928 /* Errors in truncate take precedence */ 1929 return trunc_rc ? trunc_rc : access_rc; 1930 } 1931 1932 static void ocfs2_xa_remove_entry(struct ocfs2_xa_loc *loc) 1933 { 1934 int index, count; 1935 struct ocfs2_xattr_header *xh = loc->xl_header; 1936 struct ocfs2_xattr_entry *entry = loc->xl_entry; 1937 1938 ocfs2_xa_wipe_namevalue(loc); 1939 loc->xl_entry = NULL; 1940 1941 le16_add_cpu(&xh->xh_count, -1); 1942 count = le16_to_cpu(xh->xh_count); 1943 1944 /* 1945 * Only zero out the entry if there are more remaining. This is 1946 * important for an empty bucket, as it keeps track of the 1947 * bucket's hash value. It doesn't hurt empty block storage. 1948 */ 1949 if (count) { 1950 index = ((char *)entry - (char *)&xh->xh_entries) / 1951 sizeof(struct ocfs2_xattr_entry); 1952 memmove(&xh->xh_entries[index], &xh->xh_entries[index + 1], 1953 (count - index) * sizeof(struct ocfs2_xattr_entry)); 1954 memset(&xh->xh_entries[count], 0, 1955 sizeof(struct ocfs2_xattr_entry)); 1956 } 1957 } 1958 1959 /* 1960 * If we have a problem adjusting the size of an external value during 1961 * ocfs2_xa_prepare_entry() or ocfs2_xa_remove(), we may have an xattr 1962 * in an intermediate state. For example, the value may be partially 1963 * truncated. 1964 * 1965 * If the value tree hasn't changed, the extend/truncate went nowhere. 1966 * We have nothing to do. The caller can treat it as a straight error. 1967 * 1968 * If the value tree got partially truncated, we now have a corrupted 1969 * extended attribute. We're going to wipe its entry and leak the 1970 * clusters. Better to leak some storage than leave a corrupt entry. 1971 * 1972 * If the value tree grew, it obviously didn't grow enough for the 1973 * new entry. We're not going to try and reclaim those clusters either. 1974 * If there was already an external value there (orig_clusters != 0), 1975 * the new clusters are attached safely and we can just leave the old 1976 * value in place. If there was no external value there, we remove 1977 * the entry. 1978 * 1979 * This way, the xattr block we store in the journal will be consistent. 1980 * If the size change broke because of the journal, no changes will hit 1981 * disk anyway. 1982 */ 1983 static void ocfs2_xa_cleanup_value_truncate(struct ocfs2_xa_loc *loc, 1984 const char *what, 1985 unsigned int orig_clusters) 1986 { 1987 unsigned int new_clusters = ocfs2_xa_value_clusters(loc); 1988 char *nameval_buf = ocfs2_xa_offset_pointer(loc, 1989 le16_to_cpu(loc->xl_entry->xe_name_offset)); 1990 1991 if (new_clusters < orig_clusters) { 1992 mlog(ML_ERROR, 1993 "Partial truncate while %s xattr %.*s. Leaking " 1994 "%u clusters and removing the entry\n", 1995 what, loc->xl_entry->xe_name_len, nameval_buf, 1996 orig_clusters - new_clusters); 1997 ocfs2_xa_remove_entry(loc); 1998 } else if (!orig_clusters) { 1999 mlog(ML_ERROR, 2000 "Unable to allocate an external value for xattr " 2001 "%.*s safely. Leaking %u clusters and removing the " 2002 "entry\n", 2003 loc->xl_entry->xe_name_len, nameval_buf, 2004 new_clusters - orig_clusters); 2005 ocfs2_xa_remove_entry(loc); 2006 } else if (new_clusters > orig_clusters) 2007 mlog(ML_ERROR, 2008 "Unable to grow xattr %.*s safely. %u new clusters " 2009 "have been added, but the value will not be " 2010 "modified\n", 2011 loc->xl_entry->xe_name_len, nameval_buf, 2012 new_clusters - orig_clusters); 2013 } 2014 2015 static int ocfs2_xa_remove(struct ocfs2_xa_loc *loc, 2016 struct ocfs2_xattr_set_ctxt *ctxt) 2017 { 2018 int rc = 0; 2019 unsigned int orig_clusters; 2020 2021 if (!ocfs2_xattr_is_local(loc->xl_entry)) { 2022 orig_clusters = ocfs2_xa_value_clusters(loc); 2023 rc = ocfs2_xa_value_truncate(loc, 0, ctxt); 2024 if (rc) { 2025 mlog_errno(rc); 2026 /* 2027 * Since this is remove, we can return 0 if 2028 * ocfs2_xa_cleanup_value_truncate() is going to 2029 * wipe the entry anyway. So we check the 2030 * cluster count as well. 2031 */ 2032 if (orig_clusters != ocfs2_xa_value_clusters(loc)) 2033 rc = 0; 2034 ocfs2_xa_cleanup_value_truncate(loc, "removing", 2035 orig_clusters); 2036 if (rc) 2037 goto out; 2038 } 2039 } 2040 2041 ocfs2_xa_remove_entry(loc); 2042 2043 out: 2044 return rc; 2045 } 2046 2047 static void ocfs2_xa_install_value_root(struct ocfs2_xa_loc *loc) 2048 { 2049 int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len); 2050 char *nameval_buf; 2051 2052 nameval_buf = ocfs2_xa_offset_pointer(loc, 2053 le16_to_cpu(loc->xl_entry->xe_name_offset)); 2054 memcpy(nameval_buf + name_size, &def_xv, OCFS2_XATTR_ROOT_SIZE); 2055 } 2056 2057 /* 2058 * Take an existing entry and make it ready for the new value. This 2059 * won't allocate space, but it may free space. It should be ready for 2060 * ocfs2_xa_prepare_entry() to finish the work. 2061 */ 2062 static int ocfs2_xa_reuse_entry(struct ocfs2_xa_loc *loc, 2063 struct ocfs2_xattr_info *xi, 2064 struct ocfs2_xattr_set_ctxt *ctxt) 2065 { 2066 int rc = 0; 2067 int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len); 2068 unsigned int orig_clusters; 2069 char *nameval_buf; 2070 int xe_local = ocfs2_xattr_is_local(loc->xl_entry); 2071 int xi_local = xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE; 2072 2073 BUG_ON(OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len) != 2074 name_size); 2075 2076 nameval_buf = ocfs2_xa_offset_pointer(loc, 2077 le16_to_cpu(loc->xl_entry->xe_name_offset)); 2078 if (xe_local) { 2079 memset(nameval_buf + name_size, 0, 2080 namevalue_size_xe(loc->xl_entry) - name_size); 2081 if (!xi_local) 2082 ocfs2_xa_install_value_root(loc); 2083 } else { 2084 orig_clusters = ocfs2_xa_value_clusters(loc); 2085 if (xi_local) { 2086 rc = ocfs2_xa_value_truncate(loc, 0, ctxt); 2087 if (rc < 0) 2088 mlog_errno(rc); 2089 else 2090 memset(nameval_buf + name_size, 0, 2091 namevalue_size_xe(loc->xl_entry) - 2092 name_size); 2093 } else if (le64_to_cpu(loc->xl_entry->xe_value_size) > 2094 xi->xi_value_len) { 2095 rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, 2096 ctxt); 2097 if (rc < 0) 2098 mlog_errno(rc); 2099 } 2100 2101 if (rc) { 2102 ocfs2_xa_cleanup_value_truncate(loc, "reusing", 2103 orig_clusters); 2104 goto out; 2105 } 2106 } 2107 2108 loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len); 2109 ocfs2_xattr_set_local(loc->xl_entry, xi_local); 2110 2111 out: 2112 return rc; 2113 } 2114 2115 /* 2116 * Prepares loc->xl_entry to receive the new xattr. This includes 2117 * properly setting up the name+value pair region. If loc->xl_entry 2118 * already exists, it will take care of modifying it appropriately. 2119 * 2120 * Note that this modifies the data. You did journal_access already, 2121 * right? 2122 */ 2123 static int ocfs2_xa_prepare_entry(struct ocfs2_xa_loc *loc, 2124 struct ocfs2_xattr_info *xi, 2125 u32 name_hash, 2126 struct ocfs2_xattr_set_ctxt *ctxt) 2127 { 2128 int rc = 0; 2129 unsigned int orig_clusters; 2130 __le64 orig_value_size = 0; 2131 2132 rc = ocfs2_xa_check_space(loc, xi); 2133 if (rc) 2134 goto out; 2135 2136 if (loc->xl_entry) { 2137 if (ocfs2_xa_can_reuse_entry(loc, xi)) { 2138 orig_value_size = loc->xl_entry->xe_value_size; 2139 rc = ocfs2_xa_reuse_entry(loc, xi, ctxt); 2140 if (rc) 2141 goto out; 2142 goto alloc_value; 2143 } 2144 2145 if (!ocfs2_xattr_is_local(loc->xl_entry)) { 2146 orig_clusters = ocfs2_xa_value_clusters(loc); 2147 rc = ocfs2_xa_value_truncate(loc, 0, ctxt); 2148 if (rc) { 2149 mlog_errno(rc); 2150 ocfs2_xa_cleanup_value_truncate(loc, 2151 "overwriting", 2152 orig_clusters); 2153 goto out; 2154 } 2155 } 2156 ocfs2_xa_wipe_namevalue(loc); 2157 } else 2158 ocfs2_xa_add_entry(loc, name_hash); 2159 2160 /* 2161 * If we get here, we have a blank entry. Fill it. We grow our 2162 * name+value pair back from the end. 2163 */ 2164 ocfs2_xa_add_namevalue(loc, xi); 2165 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) 2166 ocfs2_xa_install_value_root(loc); 2167 2168 alloc_value: 2169 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 2170 orig_clusters = ocfs2_xa_value_clusters(loc); 2171 rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, ctxt); 2172 if (rc < 0) { 2173 ctxt->set_abort = 1; 2174 ocfs2_xa_cleanup_value_truncate(loc, "growing", 2175 orig_clusters); 2176 /* 2177 * If we were growing an existing value, 2178 * ocfs2_xa_cleanup_value_truncate() won't remove 2179 * the entry. We need to restore the original value 2180 * size. 2181 */ 2182 if (loc->xl_entry) { 2183 BUG_ON(!orig_value_size); 2184 loc->xl_entry->xe_value_size = orig_value_size; 2185 } 2186 mlog_errno(rc); 2187 } 2188 } 2189 2190 out: 2191 return rc; 2192 } 2193 2194 /* 2195 * Store the value portion of the name+value pair. This will skip 2196 * values that are stored externally. Their tree roots were set up 2197 * by ocfs2_xa_prepare_entry(). 2198 */ 2199 static int ocfs2_xa_store_value(struct ocfs2_xa_loc *loc, 2200 struct ocfs2_xattr_info *xi, 2201 struct ocfs2_xattr_set_ctxt *ctxt) 2202 { 2203 int rc = 0; 2204 int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 2205 int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len); 2206 char *nameval_buf; 2207 struct ocfs2_xattr_value_buf vb; 2208 2209 nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset); 2210 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 2211 ocfs2_xa_fill_value_buf(loc, &vb); 2212 rc = __ocfs2_xattr_set_value_outside(loc->xl_inode, 2213 ctxt->handle, &vb, 2214 xi->xi_value, 2215 xi->xi_value_len); 2216 } else 2217 memcpy(nameval_buf + name_size, xi->xi_value, xi->xi_value_len); 2218 2219 return rc; 2220 } 2221 2222 static int ocfs2_xa_set(struct ocfs2_xa_loc *loc, 2223 struct ocfs2_xattr_info *xi, 2224 struct ocfs2_xattr_set_ctxt *ctxt) 2225 { 2226 int ret; 2227 u32 name_hash = ocfs2_xattr_name_hash(loc->xl_inode, xi->xi_name, 2228 xi->xi_name_len); 2229 2230 ret = ocfs2_xa_journal_access(ctxt->handle, loc, 2231 OCFS2_JOURNAL_ACCESS_WRITE); 2232 if (ret) { 2233 mlog_errno(ret); 2234 goto out; 2235 } 2236 2237 /* 2238 * From here on out, everything is going to modify the buffer a 2239 * little. Errors are going to leave the xattr header in a 2240 * sane state. Thus, even with errors we dirty the sucker. 2241 */ 2242 2243 /* Don't worry, we are never called with !xi_value and !xl_entry */ 2244 if (!xi->xi_value) { 2245 ret = ocfs2_xa_remove(loc, ctxt); 2246 goto out_dirty; 2247 } 2248 2249 ret = ocfs2_xa_prepare_entry(loc, xi, name_hash, ctxt); 2250 if (ret) { 2251 if (ret != -ENOSPC) 2252 mlog_errno(ret); 2253 goto out_dirty; 2254 } 2255 2256 ret = ocfs2_xa_store_value(loc, xi, ctxt); 2257 if (ret) 2258 mlog_errno(ret); 2259 2260 out_dirty: 2261 ocfs2_xa_journal_dirty(ctxt->handle, loc); 2262 2263 out: 2264 return ret; 2265 } 2266 2267 static void ocfs2_init_dinode_xa_loc(struct ocfs2_xa_loc *loc, 2268 struct inode *inode, 2269 struct buffer_head *bh, 2270 struct ocfs2_xattr_entry *entry) 2271 { 2272 struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; 2273 2274 BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_XATTR_FL)); 2275 2276 loc->xl_inode = inode; 2277 loc->xl_ops = &ocfs2_xa_block_loc_ops; 2278 loc->xl_storage = bh; 2279 loc->xl_entry = entry; 2280 loc->xl_size = le16_to_cpu(di->i_xattr_inline_size); 2281 loc->xl_header = 2282 (struct ocfs2_xattr_header *)(bh->b_data + bh->b_size - 2283 loc->xl_size); 2284 } 2285 2286 static void ocfs2_init_xattr_block_xa_loc(struct ocfs2_xa_loc *loc, 2287 struct inode *inode, 2288 struct buffer_head *bh, 2289 struct ocfs2_xattr_entry *entry) 2290 { 2291 struct ocfs2_xattr_block *xb = 2292 (struct ocfs2_xattr_block *)bh->b_data; 2293 2294 BUG_ON(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED); 2295 2296 loc->xl_inode = inode; 2297 loc->xl_ops = &ocfs2_xa_block_loc_ops; 2298 loc->xl_storage = bh; 2299 loc->xl_header = &(xb->xb_attrs.xb_header); 2300 loc->xl_entry = entry; 2301 loc->xl_size = bh->b_size - offsetof(struct ocfs2_xattr_block, 2302 xb_attrs.xb_header); 2303 } 2304 2305 static void ocfs2_init_xattr_bucket_xa_loc(struct ocfs2_xa_loc *loc, 2306 struct ocfs2_xattr_bucket *bucket, 2307 struct ocfs2_xattr_entry *entry) 2308 { 2309 loc->xl_inode = bucket->bu_inode; 2310 loc->xl_ops = &ocfs2_xa_bucket_loc_ops; 2311 loc->xl_storage = bucket; 2312 loc->xl_header = bucket_xh(bucket); 2313 loc->xl_entry = entry; 2314 loc->xl_size = OCFS2_XATTR_BUCKET_SIZE; 2315 } 2316 2317 /* 2318 * In xattr remove, if it is stored outside and refcounted, we may have 2319 * the chance to split the refcount tree. So need the allocators. 2320 */ 2321 static int ocfs2_lock_xattr_remove_allocators(struct inode *inode, 2322 struct ocfs2_xattr_value_root *xv, 2323 struct ocfs2_caching_info *ref_ci, 2324 struct buffer_head *ref_root_bh, 2325 struct ocfs2_alloc_context **meta_ac, 2326 int *ref_credits) 2327 { 2328 int ret, meta_add = 0; 2329 u32 p_cluster, num_clusters; 2330 unsigned int ext_flags; 2331 2332 *ref_credits = 0; 2333 ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster, 2334 &num_clusters, 2335 &xv->xr_list, 2336 &ext_flags); 2337 if (ret) { 2338 mlog_errno(ret); 2339 goto out; 2340 } 2341 2342 if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) 2343 goto out; 2344 2345 ret = ocfs2_refcounted_xattr_delete_need(inode, ref_ci, 2346 ref_root_bh, xv, 2347 &meta_add, ref_credits); 2348 if (ret) { 2349 mlog_errno(ret); 2350 goto out; 2351 } 2352 2353 ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb), 2354 meta_add, meta_ac); 2355 if (ret) 2356 mlog_errno(ret); 2357 2358 out: 2359 return ret; 2360 } 2361 2362 static int ocfs2_remove_value_outside(struct inode*inode, 2363 struct ocfs2_xattr_value_buf *vb, 2364 struct ocfs2_xattr_header *header, 2365 struct ocfs2_caching_info *ref_ci, 2366 struct buffer_head *ref_root_bh) 2367 { 2368 int ret = 0, i, ref_credits; 2369 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2370 struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, }; 2371 void *val; 2372 2373 ocfs2_init_dealloc_ctxt(&ctxt.dealloc); 2374 2375 for (i = 0; i < le16_to_cpu(header->xh_count); i++) { 2376 struct ocfs2_xattr_entry *entry = &header->xh_entries[i]; 2377 2378 if (ocfs2_xattr_is_local(entry)) 2379 continue; 2380 2381 val = (void *)header + 2382 le16_to_cpu(entry->xe_name_offset); 2383 vb->vb_xv = (struct ocfs2_xattr_value_root *) 2384 (val + OCFS2_XATTR_SIZE(entry->xe_name_len)); 2385 2386 ret = ocfs2_lock_xattr_remove_allocators(inode, vb->vb_xv, 2387 ref_ci, ref_root_bh, 2388 &ctxt.meta_ac, 2389 &ref_credits); 2390 2391 ctxt.handle = ocfs2_start_trans(osb, ref_credits + 2392 ocfs2_remove_extent_credits(osb->sb)); 2393 if (IS_ERR(ctxt.handle)) { 2394 ret = PTR_ERR(ctxt.handle); 2395 mlog_errno(ret); 2396 break; 2397 } 2398 2399 ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt); 2400 2401 ocfs2_commit_trans(osb, ctxt.handle); 2402 if (ctxt.meta_ac) { 2403 ocfs2_free_alloc_context(ctxt.meta_ac); 2404 ctxt.meta_ac = NULL; 2405 } 2406 2407 if (ret < 0) { 2408 mlog_errno(ret); 2409 break; 2410 } 2411 2412 } 2413 2414 if (ctxt.meta_ac) 2415 ocfs2_free_alloc_context(ctxt.meta_ac); 2416 ocfs2_schedule_truncate_log_flush(osb, 1); 2417 ocfs2_run_deallocs(osb, &ctxt.dealloc); 2418 return ret; 2419 } 2420 2421 static int ocfs2_xattr_ibody_remove(struct inode *inode, 2422 struct buffer_head *di_bh, 2423 struct ocfs2_caching_info *ref_ci, 2424 struct buffer_head *ref_root_bh) 2425 { 2426 2427 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 2428 struct ocfs2_xattr_header *header; 2429 int ret; 2430 struct ocfs2_xattr_value_buf vb = { 2431 .vb_bh = di_bh, 2432 .vb_access = ocfs2_journal_access_di, 2433 }; 2434 2435 header = (struct ocfs2_xattr_header *) 2436 ((void *)di + inode->i_sb->s_blocksize - 2437 le16_to_cpu(di->i_xattr_inline_size)); 2438 2439 ret = ocfs2_remove_value_outside(inode, &vb, header, 2440 ref_ci, ref_root_bh); 2441 2442 return ret; 2443 } 2444 2445 struct ocfs2_rm_xattr_bucket_para { 2446 struct ocfs2_caching_info *ref_ci; 2447 struct buffer_head *ref_root_bh; 2448 }; 2449 2450 static int ocfs2_xattr_block_remove(struct inode *inode, 2451 struct buffer_head *blk_bh, 2452 struct ocfs2_caching_info *ref_ci, 2453 struct buffer_head *ref_root_bh) 2454 { 2455 struct ocfs2_xattr_block *xb; 2456 int ret = 0; 2457 struct ocfs2_xattr_value_buf vb = { 2458 .vb_bh = blk_bh, 2459 .vb_access = ocfs2_journal_access_xb, 2460 }; 2461 struct ocfs2_rm_xattr_bucket_para args = { 2462 .ref_ci = ref_ci, 2463 .ref_root_bh = ref_root_bh, 2464 }; 2465 2466 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 2467 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 2468 struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header); 2469 ret = ocfs2_remove_value_outside(inode, &vb, header, 2470 ref_ci, ref_root_bh); 2471 } else 2472 ret = ocfs2_iterate_xattr_index_block(inode, 2473 blk_bh, 2474 ocfs2_rm_xattr_cluster, 2475 &args); 2476 2477 return ret; 2478 } 2479 2480 static int ocfs2_xattr_free_block(struct inode *inode, 2481 u64 block, 2482 struct ocfs2_caching_info *ref_ci, 2483 struct buffer_head *ref_root_bh) 2484 { 2485 struct inode *xb_alloc_inode; 2486 struct buffer_head *xb_alloc_bh = NULL; 2487 struct buffer_head *blk_bh = NULL; 2488 struct ocfs2_xattr_block *xb; 2489 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2490 handle_t *handle; 2491 int ret = 0; 2492 u64 blk, bg_blkno; 2493 u16 bit; 2494 2495 ret = ocfs2_read_xattr_block(inode, block, &blk_bh); 2496 if (ret < 0) { 2497 mlog_errno(ret); 2498 goto out; 2499 } 2500 2501 ret = ocfs2_xattr_block_remove(inode, blk_bh, ref_ci, ref_root_bh); 2502 if (ret < 0) { 2503 mlog_errno(ret); 2504 goto out; 2505 } 2506 2507 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 2508 blk = le64_to_cpu(xb->xb_blkno); 2509 bit = le16_to_cpu(xb->xb_suballoc_bit); 2510 if (xb->xb_suballoc_loc) 2511 bg_blkno = le64_to_cpu(xb->xb_suballoc_loc); 2512 else 2513 bg_blkno = ocfs2_which_suballoc_group(blk, bit); 2514 2515 xb_alloc_inode = ocfs2_get_system_file_inode(osb, 2516 EXTENT_ALLOC_SYSTEM_INODE, 2517 le16_to_cpu(xb->xb_suballoc_slot)); 2518 if (!xb_alloc_inode) { 2519 ret = -ENOMEM; 2520 mlog_errno(ret); 2521 goto out; 2522 } 2523 inode_lock(xb_alloc_inode); 2524 2525 ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1); 2526 if (ret < 0) { 2527 mlog_errno(ret); 2528 goto out_mutex; 2529 } 2530 2531 handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE); 2532 if (IS_ERR(handle)) { 2533 ret = PTR_ERR(handle); 2534 mlog_errno(ret); 2535 goto out_unlock; 2536 } 2537 2538 ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh, 2539 bit, bg_blkno, 1); 2540 if (ret < 0) 2541 mlog_errno(ret); 2542 2543 ocfs2_commit_trans(osb, handle); 2544 out_unlock: 2545 ocfs2_inode_unlock(xb_alloc_inode, 1); 2546 brelse(xb_alloc_bh); 2547 out_mutex: 2548 inode_unlock(xb_alloc_inode); 2549 iput(xb_alloc_inode); 2550 out: 2551 brelse(blk_bh); 2552 return ret; 2553 } 2554 2555 /* 2556 * ocfs2_xattr_remove() 2557 * 2558 * Free extended attribute resources associated with this inode. 2559 */ 2560 int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh) 2561 { 2562 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2563 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 2564 struct ocfs2_refcount_tree *ref_tree = NULL; 2565 struct buffer_head *ref_root_bh = NULL; 2566 struct ocfs2_caching_info *ref_ci = NULL; 2567 handle_t *handle; 2568 int ret; 2569 2570 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) 2571 return 0; 2572 2573 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) 2574 return 0; 2575 2576 if (ocfs2_is_refcount_inode(inode)) { 2577 ret = ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb), 2578 le64_to_cpu(di->i_refcount_loc), 2579 1, &ref_tree, &ref_root_bh); 2580 if (ret) { 2581 mlog_errno(ret); 2582 goto out; 2583 } 2584 ref_ci = &ref_tree->rf_ci; 2585 2586 } 2587 2588 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 2589 ret = ocfs2_xattr_ibody_remove(inode, di_bh, 2590 ref_ci, ref_root_bh); 2591 if (ret < 0) { 2592 mlog_errno(ret); 2593 goto out; 2594 } 2595 } 2596 2597 if (di->i_xattr_loc) { 2598 ret = ocfs2_xattr_free_block(inode, 2599 le64_to_cpu(di->i_xattr_loc), 2600 ref_ci, ref_root_bh); 2601 if (ret < 0) { 2602 mlog_errno(ret); 2603 goto out; 2604 } 2605 } 2606 2607 handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), 2608 OCFS2_INODE_UPDATE_CREDITS); 2609 if (IS_ERR(handle)) { 2610 ret = PTR_ERR(handle); 2611 mlog_errno(ret); 2612 goto out; 2613 } 2614 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, 2615 OCFS2_JOURNAL_ACCESS_WRITE); 2616 if (ret) { 2617 mlog_errno(ret); 2618 goto out_commit; 2619 } 2620 2621 di->i_xattr_loc = 0; 2622 2623 spin_lock(&oi->ip_lock); 2624 oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL); 2625 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 2626 spin_unlock(&oi->ip_lock); 2627 ocfs2_update_inode_fsync_trans(handle, inode, 0); 2628 2629 ocfs2_journal_dirty(handle, di_bh); 2630 out_commit: 2631 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); 2632 out: 2633 if (ref_tree) 2634 ocfs2_unlock_refcount_tree(OCFS2_SB(inode->i_sb), ref_tree, 1); 2635 brelse(ref_root_bh); 2636 return ret; 2637 } 2638 2639 static int ocfs2_xattr_has_space_inline(struct inode *inode, 2640 struct ocfs2_dinode *di) 2641 { 2642 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2643 unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size; 2644 int free; 2645 2646 if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE) 2647 return 0; 2648 2649 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 2650 struct ocfs2_inline_data *idata = &di->id2.i_data; 2651 free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size); 2652 } else if (ocfs2_inode_is_fast_symlink(inode)) { 2653 free = ocfs2_fast_symlink_chars(inode->i_sb) - 2654 le64_to_cpu(di->i_size); 2655 } else { 2656 struct ocfs2_extent_list *el = &di->id2.i_list; 2657 free = (le16_to_cpu(el->l_count) - 2658 le16_to_cpu(el->l_next_free_rec)) * 2659 sizeof(struct ocfs2_extent_rec); 2660 } 2661 if (free >= xattrsize) 2662 return 1; 2663 2664 return 0; 2665 } 2666 2667 /* 2668 * ocfs2_xattr_ibody_find() 2669 * 2670 * Find extended attribute in inode block and 2671 * fill search info into struct ocfs2_xattr_search. 2672 */ 2673 static int ocfs2_xattr_ibody_find(struct inode *inode, 2674 int name_index, 2675 const char *name, 2676 struct ocfs2_xattr_search *xs) 2677 { 2678 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2679 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 2680 int ret; 2681 int has_space = 0; 2682 2683 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) 2684 return 0; 2685 2686 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { 2687 down_read(&oi->ip_alloc_sem); 2688 has_space = ocfs2_xattr_has_space_inline(inode, di); 2689 up_read(&oi->ip_alloc_sem); 2690 if (!has_space) 2691 return 0; 2692 } 2693 2694 xs->xattr_bh = xs->inode_bh; 2695 xs->end = (void *)di + inode->i_sb->s_blocksize; 2696 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) 2697 xs->header = (struct ocfs2_xattr_header *) 2698 (xs->end - le16_to_cpu(di->i_xattr_inline_size)); 2699 else 2700 xs->header = (struct ocfs2_xattr_header *) 2701 (xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size); 2702 xs->base = (void *)xs->header; 2703 xs->here = xs->header->xh_entries; 2704 2705 /* Find the named attribute. */ 2706 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 2707 ret = ocfs2_xattr_find_entry(name_index, name, xs); 2708 if (ret && ret != -ENODATA) 2709 return ret; 2710 xs->not_found = ret; 2711 } 2712 2713 return 0; 2714 } 2715 2716 static int ocfs2_xattr_ibody_init(struct inode *inode, 2717 struct buffer_head *di_bh, 2718 struct ocfs2_xattr_set_ctxt *ctxt) 2719 { 2720 int ret; 2721 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2722 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 2723 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2724 unsigned int xattrsize = osb->s_xattr_inline_size; 2725 2726 if (!ocfs2_xattr_has_space_inline(inode, di)) { 2727 ret = -ENOSPC; 2728 goto out; 2729 } 2730 2731 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), di_bh, 2732 OCFS2_JOURNAL_ACCESS_WRITE); 2733 if (ret) { 2734 mlog_errno(ret); 2735 goto out; 2736 } 2737 2738 /* 2739 * Adjust extent record count or inline data size 2740 * to reserve space for extended attribute. 2741 */ 2742 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 2743 struct ocfs2_inline_data *idata = &di->id2.i_data; 2744 le16_add_cpu(&idata->id_count, -xattrsize); 2745 } else if (!(ocfs2_inode_is_fast_symlink(inode))) { 2746 struct ocfs2_extent_list *el = &di->id2.i_list; 2747 le16_add_cpu(&el->l_count, -(xattrsize / 2748 sizeof(struct ocfs2_extent_rec))); 2749 } 2750 di->i_xattr_inline_size = cpu_to_le16(xattrsize); 2751 2752 spin_lock(&oi->ip_lock); 2753 oi->ip_dyn_features |= OCFS2_INLINE_XATTR_FL|OCFS2_HAS_XATTR_FL; 2754 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 2755 spin_unlock(&oi->ip_lock); 2756 2757 ocfs2_journal_dirty(ctxt->handle, di_bh); 2758 2759 out: 2760 return ret; 2761 } 2762 2763 /* 2764 * ocfs2_xattr_ibody_set() 2765 * 2766 * Set, replace or remove an extended attribute into inode block. 2767 * 2768 */ 2769 static int ocfs2_xattr_ibody_set(struct inode *inode, 2770 struct ocfs2_xattr_info *xi, 2771 struct ocfs2_xattr_search *xs, 2772 struct ocfs2_xattr_set_ctxt *ctxt) 2773 { 2774 int ret; 2775 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2776 struct ocfs2_xa_loc loc; 2777 2778 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) 2779 return -ENOSPC; 2780 2781 down_write(&oi->ip_alloc_sem); 2782 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { 2783 ret = ocfs2_xattr_ibody_init(inode, xs->inode_bh, ctxt); 2784 if (ret) { 2785 if (ret != -ENOSPC) 2786 mlog_errno(ret); 2787 goto out; 2788 } 2789 } 2790 2791 ocfs2_init_dinode_xa_loc(&loc, inode, xs->inode_bh, 2792 xs->not_found ? NULL : xs->here); 2793 ret = ocfs2_xa_set(&loc, xi, ctxt); 2794 if (ret) { 2795 if (ret != -ENOSPC) 2796 mlog_errno(ret); 2797 goto out; 2798 } 2799 xs->here = loc.xl_entry; 2800 2801 out: 2802 up_write(&oi->ip_alloc_sem); 2803 2804 return ret; 2805 } 2806 2807 /* 2808 * ocfs2_xattr_block_find() 2809 * 2810 * Find extended attribute in external block and 2811 * fill search info into struct ocfs2_xattr_search. 2812 */ 2813 static int ocfs2_xattr_block_find(struct inode *inode, 2814 int name_index, 2815 const char *name, 2816 struct ocfs2_xattr_search *xs) 2817 { 2818 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 2819 struct buffer_head *blk_bh = NULL; 2820 struct ocfs2_xattr_block *xb; 2821 int ret = 0; 2822 2823 if (!di->i_xattr_loc) 2824 return ret; 2825 2826 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc), 2827 &blk_bh); 2828 if (ret < 0) { 2829 mlog_errno(ret); 2830 return ret; 2831 } 2832 2833 xs->xattr_bh = blk_bh; 2834 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 2835 2836 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 2837 xs->header = &xb->xb_attrs.xb_header; 2838 xs->base = (void *)xs->header; 2839 xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size; 2840 xs->here = xs->header->xh_entries; 2841 2842 ret = ocfs2_xattr_find_entry(name_index, name, xs); 2843 } else 2844 ret = ocfs2_xattr_index_block_find(inode, blk_bh, 2845 name_index, 2846 name, xs); 2847 2848 if (ret && ret != -ENODATA) { 2849 xs->xattr_bh = NULL; 2850 goto cleanup; 2851 } 2852 xs->not_found = ret; 2853 return 0; 2854 cleanup: 2855 brelse(blk_bh); 2856 2857 return ret; 2858 } 2859 2860 static int ocfs2_create_xattr_block(struct inode *inode, 2861 struct buffer_head *inode_bh, 2862 struct ocfs2_xattr_set_ctxt *ctxt, 2863 int indexed, 2864 struct buffer_head **ret_bh) 2865 { 2866 int ret; 2867 u16 suballoc_bit_start; 2868 u32 num_got; 2869 u64 suballoc_loc, first_blkno; 2870 struct ocfs2_dinode *di = (struct ocfs2_dinode *)inode_bh->b_data; 2871 struct buffer_head *new_bh = NULL; 2872 struct ocfs2_xattr_block *xblk; 2873 2874 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), 2875 inode_bh, OCFS2_JOURNAL_ACCESS_CREATE); 2876 if (ret < 0) { 2877 mlog_errno(ret); 2878 goto end; 2879 } 2880 2881 ret = ocfs2_claim_metadata(ctxt->handle, ctxt->meta_ac, 1, 2882 &suballoc_loc, &suballoc_bit_start, 2883 &num_got, &first_blkno); 2884 if (ret < 0) { 2885 mlog_errno(ret); 2886 goto end; 2887 } 2888 2889 new_bh = sb_getblk(inode->i_sb, first_blkno); 2890 if (!new_bh) { 2891 ret = -ENOMEM; 2892 mlog_errno(ret); 2893 goto end; 2894 } 2895 2896 ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh); 2897 2898 ret = ocfs2_journal_access_xb(ctxt->handle, INODE_CACHE(inode), 2899 new_bh, 2900 OCFS2_JOURNAL_ACCESS_CREATE); 2901 if (ret < 0) { 2902 mlog_errno(ret); 2903 goto end; 2904 } 2905 2906 /* Initialize ocfs2_xattr_block */ 2907 xblk = (struct ocfs2_xattr_block *)new_bh->b_data; 2908 memset(xblk, 0, inode->i_sb->s_blocksize); 2909 strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE); 2910 xblk->xb_suballoc_slot = cpu_to_le16(ctxt->meta_ac->ac_alloc_slot); 2911 xblk->xb_suballoc_loc = cpu_to_le64(suballoc_loc); 2912 xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start); 2913 xblk->xb_fs_generation = 2914 cpu_to_le32(OCFS2_SB(inode->i_sb)->fs_generation); 2915 xblk->xb_blkno = cpu_to_le64(first_blkno); 2916 if (indexed) { 2917 struct ocfs2_xattr_tree_root *xr = &xblk->xb_attrs.xb_root; 2918 xr->xt_clusters = cpu_to_le32(1); 2919 xr->xt_last_eb_blk = 0; 2920 xr->xt_list.l_tree_depth = 0; 2921 xr->xt_list.l_count = cpu_to_le16( 2922 ocfs2_xattr_recs_per_xb(inode->i_sb)); 2923 xr->xt_list.l_next_free_rec = cpu_to_le16(1); 2924 xblk->xb_flags = cpu_to_le16(OCFS2_XATTR_INDEXED); 2925 } 2926 ocfs2_journal_dirty(ctxt->handle, new_bh); 2927 2928 /* Add it to the inode */ 2929 di->i_xattr_loc = cpu_to_le64(first_blkno); 2930 2931 spin_lock(&OCFS2_I(inode)->ip_lock); 2932 OCFS2_I(inode)->ip_dyn_features |= OCFS2_HAS_XATTR_FL; 2933 di->i_dyn_features = cpu_to_le16(OCFS2_I(inode)->ip_dyn_features); 2934 spin_unlock(&OCFS2_I(inode)->ip_lock); 2935 2936 ocfs2_journal_dirty(ctxt->handle, inode_bh); 2937 2938 *ret_bh = new_bh; 2939 new_bh = NULL; 2940 2941 end: 2942 brelse(new_bh); 2943 return ret; 2944 } 2945 2946 /* 2947 * ocfs2_xattr_block_set() 2948 * 2949 * Set, replace or remove an extended attribute into external block. 2950 * 2951 */ 2952 static int ocfs2_xattr_block_set(struct inode *inode, 2953 struct ocfs2_xattr_info *xi, 2954 struct ocfs2_xattr_search *xs, 2955 struct ocfs2_xattr_set_ctxt *ctxt) 2956 { 2957 struct buffer_head *new_bh = NULL; 2958 struct ocfs2_xattr_block *xblk = NULL; 2959 int ret; 2960 struct ocfs2_xa_loc loc; 2961 2962 if (!xs->xattr_bh) { 2963 ret = ocfs2_create_xattr_block(inode, xs->inode_bh, ctxt, 2964 0, &new_bh); 2965 if (ret) { 2966 mlog_errno(ret); 2967 goto end; 2968 } 2969 2970 xs->xattr_bh = new_bh; 2971 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; 2972 xs->header = &xblk->xb_attrs.xb_header; 2973 xs->base = (void *)xs->header; 2974 xs->end = (void *)xblk + inode->i_sb->s_blocksize; 2975 xs->here = xs->header->xh_entries; 2976 } else 2977 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; 2978 2979 if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) { 2980 ocfs2_init_xattr_block_xa_loc(&loc, inode, xs->xattr_bh, 2981 xs->not_found ? NULL : xs->here); 2982 2983 ret = ocfs2_xa_set(&loc, xi, ctxt); 2984 if (!ret) 2985 xs->here = loc.xl_entry; 2986 else if ((ret != -ENOSPC) || ctxt->set_abort) 2987 goto end; 2988 else { 2989 ret = ocfs2_xattr_create_index_block(inode, xs, ctxt); 2990 if (ret) 2991 goto end; 2992 } 2993 } 2994 2995 if (le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED) 2996 ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt); 2997 2998 end: 2999 return ret; 3000 } 3001 3002 /* Check whether the new xattr can be inserted into the inode. */ 3003 static int ocfs2_xattr_can_be_in_inode(struct inode *inode, 3004 struct ocfs2_xattr_info *xi, 3005 struct ocfs2_xattr_search *xs) 3006 { 3007 struct ocfs2_xattr_entry *last; 3008 int free, i; 3009 size_t min_offs = xs->end - xs->base; 3010 3011 if (!xs->header) 3012 return 0; 3013 3014 last = xs->header->xh_entries; 3015 3016 for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) { 3017 size_t offs = le16_to_cpu(last->xe_name_offset); 3018 if (offs < min_offs) 3019 min_offs = offs; 3020 last += 1; 3021 } 3022 3023 free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP; 3024 if (free < 0) 3025 return 0; 3026 3027 BUG_ON(!xs->not_found); 3028 3029 if (free >= (sizeof(struct ocfs2_xattr_entry) + namevalue_size_xi(xi))) 3030 return 1; 3031 3032 return 0; 3033 } 3034 3035 static int ocfs2_calc_xattr_set_need(struct inode *inode, 3036 struct ocfs2_dinode *di, 3037 struct ocfs2_xattr_info *xi, 3038 struct ocfs2_xattr_search *xis, 3039 struct ocfs2_xattr_search *xbs, 3040 int *clusters_need, 3041 int *meta_need, 3042 int *credits_need) 3043 { 3044 int ret = 0, old_in_xb = 0; 3045 int clusters_add = 0, meta_add = 0, credits = 0; 3046 struct buffer_head *bh = NULL; 3047 struct ocfs2_xattr_block *xb = NULL; 3048 struct ocfs2_xattr_entry *xe = NULL; 3049 struct ocfs2_xattr_value_root *xv = NULL; 3050 char *base = NULL; 3051 int name_offset, name_len = 0; 3052 u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, 3053 xi->xi_value_len); 3054 u64 value_size; 3055 3056 /* 3057 * Calculate the clusters we need to write. 3058 * No matter whether we replace an old one or add a new one, 3059 * we need this for writing. 3060 */ 3061 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) 3062 credits += new_clusters * 3063 ocfs2_clusters_to_blocks(inode->i_sb, 1); 3064 3065 if (xis->not_found && xbs->not_found) { 3066 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3067 3068 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 3069 clusters_add += new_clusters; 3070 credits += ocfs2_calc_extend_credits(inode->i_sb, 3071 &def_xv.xv.xr_list); 3072 } 3073 3074 goto meta_guess; 3075 } 3076 3077 if (!xis->not_found) { 3078 xe = xis->here; 3079 name_offset = le16_to_cpu(xe->xe_name_offset); 3080 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 3081 base = xis->base; 3082 credits += OCFS2_INODE_UPDATE_CREDITS; 3083 } else { 3084 int i, block_off = 0; 3085 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; 3086 xe = xbs->here; 3087 name_offset = le16_to_cpu(xe->xe_name_offset); 3088 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 3089 i = xbs->here - xbs->header->xh_entries; 3090 old_in_xb = 1; 3091 3092 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 3093 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 3094 bucket_xh(xbs->bucket), 3095 i, &block_off, 3096 &name_offset); 3097 base = bucket_block(xbs->bucket, block_off); 3098 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3099 } else { 3100 base = xbs->base; 3101 credits += OCFS2_XATTR_BLOCK_UPDATE_CREDITS; 3102 } 3103 } 3104 3105 /* 3106 * delete a xattr doesn't need metadata and cluster allocation. 3107 * so just calculate the credits and return. 3108 * 3109 * The credits for removing the value tree will be extended 3110 * by ocfs2_remove_extent itself. 3111 */ 3112 if (!xi->xi_value) { 3113 if (!ocfs2_xattr_is_local(xe)) 3114 credits += ocfs2_remove_extent_credits(inode->i_sb); 3115 3116 goto out; 3117 } 3118 3119 /* do cluster allocation guess first. */ 3120 value_size = le64_to_cpu(xe->xe_value_size); 3121 3122 if (old_in_xb) { 3123 /* 3124 * In xattr set, we always try to set the xe in inode first, 3125 * so if it can be inserted into inode successfully, the old 3126 * one will be removed from the xattr block, and this xattr 3127 * will be inserted into inode as a new xattr in inode. 3128 */ 3129 if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) { 3130 clusters_add += new_clusters; 3131 credits += ocfs2_remove_extent_credits(inode->i_sb) + 3132 OCFS2_INODE_UPDATE_CREDITS; 3133 if (!ocfs2_xattr_is_local(xe)) 3134 credits += ocfs2_calc_extend_credits( 3135 inode->i_sb, 3136 &def_xv.xv.xr_list); 3137 goto out; 3138 } 3139 } 3140 3141 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 3142 /* the new values will be stored outside. */ 3143 u32 old_clusters = 0; 3144 3145 if (!ocfs2_xattr_is_local(xe)) { 3146 old_clusters = ocfs2_clusters_for_bytes(inode->i_sb, 3147 value_size); 3148 xv = (struct ocfs2_xattr_value_root *) 3149 (base + name_offset + name_len); 3150 value_size = OCFS2_XATTR_ROOT_SIZE; 3151 } else 3152 xv = &def_xv.xv; 3153 3154 if (old_clusters >= new_clusters) { 3155 credits += ocfs2_remove_extent_credits(inode->i_sb); 3156 goto out; 3157 } else { 3158 meta_add += ocfs2_extend_meta_needed(&xv->xr_list); 3159 clusters_add += new_clusters - old_clusters; 3160 credits += ocfs2_calc_extend_credits(inode->i_sb, 3161 &xv->xr_list); 3162 if (value_size >= OCFS2_XATTR_ROOT_SIZE) 3163 goto out; 3164 } 3165 } else { 3166 /* 3167 * Now the new value will be stored inside. So if the new 3168 * value is smaller than the size of value root or the old 3169 * value, we don't need any allocation, otherwise we have 3170 * to guess metadata allocation. 3171 */ 3172 if ((ocfs2_xattr_is_local(xe) && 3173 (value_size >= xi->xi_value_len)) || 3174 (!ocfs2_xattr_is_local(xe) && 3175 OCFS2_XATTR_ROOT_SIZE >= xi->xi_value_len)) 3176 goto out; 3177 } 3178 3179 meta_guess: 3180 /* calculate metadata allocation. */ 3181 if (di->i_xattr_loc) { 3182 if (!xbs->xattr_bh) { 3183 ret = ocfs2_read_xattr_block(inode, 3184 le64_to_cpu(di->i_xattr_loc), 3185 &bh); 3186 if (ret) { 3187 mlog_errno(ret); 3188 goto out; 3189 } 3190 3191 xb = (struct ocfs2_xattr_block *)bh->b_data; 3192 } else 3193 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; 3194 3195 /* 3196 * If there is already an xattr tree, good, we can calculate 3197 * like other b-trees. Otherwise we may have the chance of 3198 * create a tree, the credit calculation is borrowed from 3199 * ocfs2_calc_extend_credits with root_el = NULL. And the 3200 * new tree will be cluster based, so no meta is needed. 3201 */ 3202 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 3203 struct ocfs2_extent_list *el = 3204 &xb->xb_attrs.xb_root.xt_list; 3205 meta_add += ocfs2_extend_meta_needed(el); 3206 credits += ocfs2_calc_extend_credits(inode->i_sb, 3207 el); 3208 } else 3209 credits += OCFS2_SUBALLOC_ALLOC + 1; 3210 3211 /* 3212 * This cluster will be used either for new bucket or for 3213 * new xattr block. 3214 * If the cluster size is the same as the bucket size, one 3215 * more is needed since we may need to extend the bucket 3216 * also. 3217 */ 3218 clusters_add += 1; 3219 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3220 if (OCFS2_XATTR_BUCKET_SIZE == 3221 OCFS2_SB(inode->i_sb)->s_clustersize) { 3222 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3223 clusters_add += 1; 3224 } 3225 } else { 3226 credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; 3227 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 3228 struct ocfs2_extent_list *el = &def_xv.xv.xr_list; 3229 meta_add += ocfs2_extend_meta_needed(el); 3230 credits += ocfs2_calc_extend_credits(inode->i_sb, 3231 el); 3232 } else { 3233 meta_add += 1; 3234 } 3235 } 3236 out: 3237 if (clusters_need) 3238 *clusters_need = clusters_add; 3239 if (meta_need) 3240 *meta_need = meta_add; 3241 if (credits_need) 3242 *credits_need = credits; 3243 brelse(bh); 3244 return ret; 3245 } 3246 3247 static int ocfs2_init_xattr_set_ctxt(struct inode *inode, 3248 struct ocfs2_dinode *di, 3249 struct ocfs2_xattr_info *xi, 3250 struct ocfs2_xattr_search *xis, 3251 struct ocfs2_xattr_search *xbs, 3252 struct ocfs2_xattr_set_ctxt *ctxt, 3253 int extra_meta, 3254 int *credits) 3255 { 3256 int clusters_add, meta_add, ret; 3257 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 3258 3259 memset(ctxt, 0, sizeof(struct ocfs2_xattr_set_ctxt)); 3260 3261 ocfs2_init_dealloc_ctxt(&ctxt->dealloc); 3262 3263 ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs, 3264 &clusters_add, &meta_add, credits); 3265 if (ret) { 3266 mlog_errno(ret); 3267 return ret; 3268 } 3269 3270 meta_add += extra_meta; 3271 trace_ocfs2_init_xattr_set_ctxt(xi->xi_name, meta_add, 3272 clusters_add, *credits); 3273 3274 if (meta_add) { 3275 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, 3276 &ctxt->meta_ac); 3277 if (ret) { 3278 mlog_errno(ret); 3279 goto out; 3280 } 3281 } 3282 3283 if (clusters_add) { 3284 ret = ocfs2_reserve_clusters(osb, clusters_add, &ctxt->data_ac); 3285 if (ret) 3286 mlog_errno(ret); 3287 } 3288 out: 3289 if (ret) { 3290 if (ctxt->meta_ac) { 3291 ocfs2_free_alloc_context(ctxt->meta_ac); 3292 ctxt->meta_ac = NULL; 3293 } 3294 3295 /* 3296 * We cannot have an error and a non null ctxt->data_ac. 3297 */ 3298 } 3299 3300 return ret; 3301 } 3302 3303 static int __ocfs2_xattr_set_handle(struct inode *inode, 3304 struct ocfs2_dinode *di, 3305 struct ocfs2_xattr_info *xi, 3306 struct ocfs2_xattr_search *xis, 3307 struct ocfs2_xattr_search *xbs, 3308 struct ocfs2_xattr_set_ctxt *ctxt) 3309 { 3310 int ret = 0, credits, old_found; 3311 3312 if (!xi->xi_value) { 3313 /* Remove existing extended attribute */ 3314 if (!xis->not_found) 3315 ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt); 3316 else if (!xbs->not_found) 3317 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); 3318 } else { 3319 /* We always try to set extended attribute into inode first*/ 3320 ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt); 3321 if (!ret && !xbs->not_found) { 3322 /* 3323 * If succeed and that extended attribute existing in 3324 * external block, then we will remove it. 3325 */ 3326 xi->xi_value = NULL; 3327 xi->xi_value_len = 0; 3328 3329 old_found = xis->not_found; 3330 xis->not_found = -ENODATA; 3331 ret = ocfs2_calc_xattr_set_need(inode, 3332 di, 3333 xi, 3334 xis, 3335 xbs, 3336 NULL, 3337 NULL, 3338 &credits); 3339 xis->not_found = old_found; 3340 if (ret) { 3341 mlog_errno(ret); 3342 goto out; 3343 } 3344 3345 ret = ocfs2_extend_trans(ctxt->handle, credits); 3346 if (ret) { 3347 mlog_errno(ret); 3348 goto out; 3349 } 3350 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); 3351 } else if ((ret == -ENOSPC) && !ctxt->set_abort) { 3352 if (di->i_xattr_loc && !xbs->xattr_bh) { 3353 ret = ocfs2_xattr_block_find(inode, 3354 xi->xi_name_index, 3355 xi->xi_name, xbs); 3356 if (ret) 3357 goto out; 3358 3359 old_found = xis->not_found; 3360 xis->not_found = -ENODATA; 3361 ret = ocfs2_calc_xattr_set_need(inode, 3362 di, 3363 xi, 3364 xis, 3365 xbs, 3366 NULL, 3367 NULL, 3368 &credits); 3369 xis->not_found = old_found; 3370 if (ret) { 3371 mlog_errno(ret); 3372 goto out; 3373 } 3374 3375 ret = ocfs2_extend_trans(ctxt->handle, credits); 3376 if (ret) { 3377 mlog_errno(ret); 3378 goto out; 3379 } 3380 } 3381 /* 3382 * If no space in inode, we will set extended attribute 3383 * into external block. 3384 */ 3385 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); 3386 if (ret) 3387 goto out; 3388 if (!xis->not_found) { 3389 /* 3390 * If succeed and that extended attribute 3391 * existing in inode, we will remove it. 3392 */ 3393 xi->xi_value = NULL; 3394 xi->xi_value_len = 0; 3395 xbs->not_found = -ENODATA; 3396 ret = ocfs2_calc_xattr_set_need(inode, 3397 di, 3398 xi, 3399 xis, 3400 xbs, 3401 NULL, 3402 NULL, 3403 &credits); 3404 if (ret) { 3405 mlog_errno(ret); 3406 goto out; 3407 } 3408 3409 ret = ocfs2_extend_trans(ctxt->handle, credits); 3410 if (ret) { 3411 mlog_errno(ret); 3412 goto out; 3413 } 3414 ret = ocfs2_xattr_ibody_set(inode, xi, 3415 xis, ctxt); 3416 } 3417 } 3418 } 3419 3420 if (!ret) { 3421 /* Update inode ctime. */ 3422 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), 3423 xis->inode_bh, 3424 OCFS2_JOURNAL_ACCESS_WRITE); 3425 if (ret) { 3426 mlog_errno(ret); 3427 goto out; 3428 } 3429 3430 inode->i_ctime = current_time(inode); 3431 di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); 3432 di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); 3433 ocfs2_journal_dirty(ctxt->handle, xis->inode_bh); 3434 } 3435 out: 3436 return ret; 3437 } 3438 3439 /* 3440 * This function only called duing creating inode 3441 * for init security/acl xattrs of the new inode. 3442 * All transanction credits have been reserved in mknod. 3443 */ 3444 int ocfs2_xattr_set_handle(handle_t *handle, 3445 struct inode *inode, 3446 struct buffer_head *di_bh, 3447 int name_index, 3448 const char *name, 3449 const void *value, 3450 size_t value_len, 3451 int flags, 3452 struct ocfs2_alloc_context *meta_ac, 3453 struct ocfs2_alloc_context *data_ac) 3454 { 3455 struct ocfs2_dinode *di; 3456 int ret; 3457 3458 struct ocfs2_xattr_info xi = { 3459 .xi_name_index = name_index, 3460 .xi_name = name, 3461 .xi_name_len = strlen(name), 3462 .xi_value = value, 3463 .xi_value_len = value_len, 3464 }; 3465 3466 struct ocfs2_xattr_search xis = { 3467 .not_found = -ENODATA, 3468 }; 3469 3470 struct ocfs2_xattr_search xbs = { 3471 .not_found = -ENODATA, 3472 }; 3473 3474 struct ocfs2_xattr_set_ctxt ctxt = { 3475 .handle = handle, 3476 .meta_ac = meta_ac, 3477 .data_ac = data_ac, 3478 }; 3479 3480 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) 3481 return -EOPNOTSUPP; 3482 3483 /* 3484 * In extreme situation, may need xattr bucket when 3485 * block size is too small. And we have already reserved 3486 * the credits for bucket in mknod. 3487 */ 3488 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) { 3489 xbs.bucket = ocfs2_xattr_bucket_new(inode); 3490 if (!xbs.bucket) { 3491 mlog_errno(-ENOMEM); 3492 return -ENOMEM; 3493 } 3494 } 3495 3496 xis.inode_bh = xbs.inode_bh = di_bh; 3497 di = (struct ocfs2_dinode *)di_bh->b_data; 3498 3499 down_write(&OCFS2_I(inode)->ip_xattr_sem); 3500 3501 ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis); 3502 if (ret) 3503 goto cleanup; 3504 if (xis.not_found) { 3505 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs); 3506 if (ret) 3507 goto cleanup; 3508 } 3509 3510 ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt); 3511 3512 cleanup: 3513 up_write(&OCFS2_I(inode)->ip_xattr_sem); 3514 brelse(xbs.xattr_bh); 3515 ocfs2_xattr_bucket_free(xbs.bucket); 3516 3517 return ret; 3518 } 3519 3520 /* 3521 * ocfs2_xattr_set() 3522 * 3523 * Set, replace or remove an extended attribute for this inode. 3524 * value is NULL to remove an existing extended attribute, else either 3525 * create or replace an extended attribute. 3526 */ 3527 int ocfs2_xattr_set(struct inode *inode, 3528 int name_index, 3529 const char *name, 3530 const void *value, 3531 size_t value_len, 3532 int flags) 3533 { 3534 struct buffer_head *di_bh = NULL; 3535 struct ocfs2_dinode *di; 3536 int ret, credits, had_lock, ref_meta = 0, ref_credits = 0; 3537 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 3538 struct inode *tl_inode = osb->osb_tl_inode; 3539 struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, NULL, }; 3540 struct ocfs2_refcount_tree *ref_tree = NULL; 3541 struct ocfs2_lock_holder oh; 3542 3543 struct ocfs2_xattr_info xi = { 3544 .xi_name_index = name_index, 3545 .xi_name = name, 3546 .xi_name_len = strlen(name), 3547 .xi_value = value, 3548 .xi_value_len = value_len, 3549 }; 3550 3551 struct ocfs2_xattr_search xis = { 3552 .not_found = -ENODATA, 3553 }; 3554 3555 struct ocfs2_xattr_search xbs = { 3556 .not_found = -ENODATA, 3557 }; 3558 3559 if (!ocfs2_supports_xattr(osb)) 3560 return -EOPNOTSUPP; 3561 3562 /* 3563 * Only xbs will be used on indexed trees. xis doesn't need a 3564 * bucket. 3565 */ 3566 xbs.bucket = ocfs2_xattr_bucket_new(inode); 3567 if (!xbs.bucket) { 3568 mlog_errno(-ENOMEM); 3569 return -ENOMEM; 3570 } 3571 3572 had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 1, &oh); 3573 if (had_lock < 0) { 3574 ret = had_lock; 3575 mlog_errno(ret); 3576 goto cleanup_nolock; 3577 } 3578 xis.inode_bh = xbs.inode_bh = di_bh; 3579 di = (struct ocfs2_dinode *)di_bh->b_data; 3580 3581 down_write(&OCFS2_I(inode)->ip_xattr_sem); 3582 /* 3583 * Scan inode and external block to find the same name 3584 * extended attribute and collect search information. 3585 */ 3586 ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis); 3587 if (ret) 3588 goto cleanup; 3589 if (xis.not_found) { 3590 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs); 3591 if (ret) 3592 goto cleanup; 3593 } 3594 3595 if (xis.not_found && xbs.not_found) { 3596 ret = -ENODATA; 3597 if (flags & XATTR_REPLACE) 3598 goto cleanup; 3599 ret = 0; 3600 if (!value) 3601 goto cleanup; 3602 } else { 3603 ret = -EEXIST; 3604 if (flags & XATTR_CREATE) 3605 goto cleanup; 3606 } 3607 3608 /* Check whether the value is refcounted and do some preparation. */ 3609 if (ocfs2_is_refcount_inode(inode) && 3610 (!xis.not_found || !xbs.not_found)) { 3611 ret = ocfs2_prepare_refcount_xattr(inode, di, &xi, 3612 &xis, &xbs, &ref_tree, 3613 &ref_meta, &ref_credits); 3614 if (ret) { 3615 mlog_errno(ret); 3616 goto cleanup; 3617 } 3618 } 3619 3620 inode_lock(tl_inode); 3621 3622 if (ocfs2_truncate_log_needs_flush(osb)) { 3623 ret = __ocfs2_flush_truncate_log(osb); 3624 if (ret < 0) { 3625 inode_unlock(tl_inode); 3626 mlog_errno(ret); 3627 goto cleanup; 3628 } 3629 } 3630 inode_unlock(tl_inode); 3631 3632 ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis, 3633 &xbs, &ctxt, ref_meta, &credits); 3634 if (ret) { 3635 mlog_errno(ret); 3636 goto cleanup; 3637 } 3638 3639 /* we need to update inode's ctime field, so add credit for it. */ 3640 credits += OCFS2_INODE_UPDATE_CREDITS; 3641 ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits); 3642 if (IS_ERR(ctxt.handle)) { 3643 ret = PTR_ERR(ctxt.handle); 3644 mlog_errno(ret); 3645 goto out_free_ac; 3646 } 3647 3648 ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt); 3649 ocfs2_update_inode_fsync_trans(ctxt.handle, inode, 0); 3650 3651 ocfs2_commit_trans(osb, ctxt.handle); 3652 3653 out_free_ac: 3654 if (ctxt.data_ac) 3655 ocfs2_free_alloc_context(ctxt.data_ac); 3656 if (ctxt.meta_ac) 3657 ocfs2_free_alloc_context(ctxt.meta_ac); 3658 if (ocfs2_dealloc_has_cluster(&ctxt.dealloc)) 3659 ocfs2_schedule_truncate_log_flush(osb, 1); 3660 ocfs2_run_deallocs(osb, &ctxt.dealloc); 3661 3662 cleanup: 3663 if (ref_tree) 3664 ocfs2_unlock_refcount_tree(osb, ref_tree, 1); 3665 up_write(&OCFS2_I(inode)->ip_xattr_sem); 3666 if (!value && !ret) { 3667 ret = ocfs2_try_remove_refcount_tree(inode, di_bh); 3668 if (ret) 3669 mlog_errno(ret); 3670 } 3671 ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock); 3672 cleanup_nolock: 3673 brelse(di_bh); 3674 brelse(xbs.xattr_bh); 3675 ocfs2_xattr_bucket_free(xbs.bucket); 3676 3677 return ret; 3678 } 3679 3680 /* 3681 * Find the xattr extent rec which may contains name_hash. 3682 * e_cpos will be the first name hash of the xattr rec. 3683 * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list. 3684 */ 3685 static int ocfs2_xattr_get_rec(struct inode *inode, 3686 u32 name_hash, 3687 u64 *p_blkno, 3688 u32 *e_cpos, 3689 u32 *num_clusters, 3690 struct ocfs2_extent_list *el) 3691 { 3692 int ret = 0, i; 3693 struct buffer_head *eb_bh = NULL; 3694 struct ocfs2_extent_block *eb; 3695 struct ocfs2_extent_rec *rec = NULL; 3696 u64 e_blkno = 0; 3697 3698 if (el->l_tree_depth) { 3699 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, name_hash, 3700 &eb_bh); 3701 if (ret) { 3702 mlog_errno(ret); 3703 goto out; 3704 } 3705 3706 eb = (struct ocfs2_extent_block *) eb_bh->b_data; 3707 el = &eb->h_list; 3708 3709 if (el->l_tree_depth) { 3710 ret = ocfs2_error(inode->i_sb, 3711 "Inode %lu has non zero tree depth in xattr tree block %llu\n", 3712 inode->i_ino, 3713 (unsigned long long)eb_bh->b_blocknr); 3714 goto out; 3715 } 3716 } 3717 3718 for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) { 3719 rec = &el->l_recs[i]; 3720 3721 if (le32_to_cpu(rec->e_cpos) <= name_hash) { 3722 e_blkno = le64_to_cpu(rec->e_blkno); 3723 break; 3724 } 3725 } 3726 3727 if (!e_blkno) { 3728 ret = ocfs2_error(inode->i_sb, "Inode %lu has bad extent record (%u, %u, 0) in xattr\n", 3729 inode->i_ino, 3730 le32_to_cpu(rec->e_cpos), 3731 ocfs2_rec_clusters(el, rec)); 3732 goto out; 3733 } 3734 3735 *p_blkno = le64_to_cpu(rec->e_blkno); 3736 *num_clusters = le16_to_cpu(rec->e_leaf_clusters); 3737 if (e_cpos) 3738 *e_cpos = le32_to_cpu(rec->e_cpos); 3739 out: 3740 brelse(eb_bh); 3741 return ret; 3742 } 3743 3744 typedef int (xattr_bucket_func)(struct inode *inode, 3745 struct ocfs2_xattr_bucket *bucket, 3746 void *para); 3747 3748 static int ocfs2_find_xe_in_bucket(struct inode *inode, 3749 struct ocfs2_xattr_bucket *bucket, 3750 int name_index, 3751 const char *name, 3752 u32 name_hash, 3753 u16 *xe_index, 3754 int *found) 3755 { 3756 int i, ret = 0, cmp = 1, block_off, new_offset; 3757 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 3758 size_t name_len = strlen(name); 3759 struct ocfs2_xattr_entry *xe = NULL; 3760 char *xe_name; 3761 3762 /* 3763 * We don't use binary search in the bucket because there 3764 * may be multiple entries with the same name hash. 3765 */ 3766 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 3767 xe = &xh->xh_entries[i]; 3768 3769 if (name_hash > le32_to_cpu(xe->xe_name_hash)) 3770 continue; 3771 else if (name_hash < le32_to_cpu(xe->xe_name_hash)) 3772 break; 3773 3774 cmp = name_index - ocfs2_xattr_get_type(xe); 3775 if (!cmp) 3776 cmp = name_len - xe->xe_name_len; 3777 if (cmp) 3778 continue; 3779 3780 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 3781 xh, 3782 i, 3783 &block_off, 3784 &new_offset); 3785 if (ret) { 3786 mlog_errno(ret); 3787 break; 3788 } 3789 3790 3791 xe_name = bucket_block(bucket, block_off) + new_offset; 3792 if (!memcmp(name, xe_name, name_len)) { 3793 *xe_index = i; 3794 *found = 1; 3795 ret = 0; 3796 break; 3797 } 3798 } 3799 3800 return ret; 3801 } 3802 3803 /* 3804 * Find the specified xattr entry in a series of buckets. 3805 * This series start from p_blkno and last for num_clusters. 3806 * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains 3807 * the num of the valid buckets. 3808 * 3809 * Return the buffer_head this xattr should reside in. And if the xattr's 3810 * hash is in the gap of 2 buckets, return the lower bucket. 3811 */ 3812 static int ocfs2_xattr_bucket_find(struct inode *inode, 3813 int name_index, 3814 const char *name, 3815 u32 name_hash, 3816 u64 p_blkno, 3817 u32 first_hash, 3818 u32 num_clusters, 3819 struct ocfs2_xattr_search *xs) 3820 { 3821 int ret, found = 0; 3822 struct ocfs2_xattr_header *xh = NULL; 3823 struct ocfs2_xattr_entry *xe = NULL; 3824 u16 index = 0; 3825 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3826 int low_bucket = 0, bucket, high_bucket; 3827 struct ocfs2_xattr_bucket *search; 3828 u64 blkno, lower_blkno = 0; 3829 3830 search = ocfs2_xattr_bucket_new(inode); 3831 if (!search) { 3832 ret = -ENOMEM; 3833 mlog_errno(ret); 3834 goto out; 3835 } 3836 3837 ret = ocfs2_read_xattr_bucket(search, p_blkno); 3838 if (ret) { 3839 mlog_errno(ret); 3840 goto out; 3841 } 3842 3843 xh = bucket_xh(search); 3844 high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1; 3845 while (low_bucket <= high_bucket) { 3846 ocfs2_xattr_bucket_relse(search); 3847 3848 bucket = (low_bucket + high_bucket) / 2; 3849 blkno = p_blkno + bucket * blk_per_bucket; 3850 ret = ocfs2_read_xattr_bucket(search, blkno); 3851 if (ret) { 3852 mlog_errno(ret); 3853 goto out; 3854 } 3855 3856 xh = bucket_xh(search); 3857 xe = &xh->xh_entries[0]; 3858 if (name_hash < le32_to_cpu(xe->xe_name_hash)) { 3859 high_bucket = bucket - 1; 3860 continue; 3861 } 3862 3863 /* 3864 * Check whether the hash of the last entry in our 3865 * bucket is larger than the search one. for an empty 3866 * bucket, the last one is also the first one. 3867 */ 3868 if (xh->xh_count) 3869 xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1]; 3870 3871 /* record lower_blkno which may be the insert place. */ 3872 lower_blkno = blkno; 3873 3874 if (name_hash > le32_to_cpu(xe->xe_name_hash)) { 3875 low_bucket = bucket + 1; 3876 continue; 3877 } 3878 3879 /* the searched xattr should reside in this bucket if exists. */ 3880 ret = ocfs2_find_xe_in_bucket(inode, search, 3881 name_index, name, name_hash, 3882 &index, &found); 3883 if (ret) { 3884 mlog_errno(ret); 3885 goto out; 3886 } 3887 break; 3888 } 3889 3890 /* 3891 * Record the bucket we have found. 3892 * When the xattr's hash value is in the gap of 2 buckets, we will 3893 * always set it to the previous bucket. 3894 */ 3895 if (!lower_blkno) 3896 lower_blkno = p_blkno; 3897 3898 /* This should be in cache - we just read it during the search */ 3899 ret = ocfs2_read_xattr_bucket(xs->bucket, lower_blkno); 3900 if (ret) { 3901 mlog_errno(ret); 3902 goto out; 3903 } 3904 3905 xs->header = bucket_xh(xs->bucket); 3906 xs->base = bucket_block(xs->bucket, 0); 3907 xs->end = xs->base + inode->i_sb->s_blocksize; 3908 3909 if (found) { 3910 xs->here = &xs->header->xh_entries[index]; 3911 trace_ocfs2_xattr_bucket_find(OCFS2_I(inode)->ip_blkno, 3912 name, name_index, name_hash, 3913 (unsigned long long)bucket_blkno(xs->bucket), 3914 index); 3915 } else 3916 ret = -ENODATA; 3917 3918 out: 3919 ocfs2_xattr_bucket_free(search); 3920 return ret; 3921 } 3922 3923 static int ocfs2_xattr_index_block_find(struct inode *inode, 3924 struct buffer_head *root_bh, 3925 int name_index, 3926 const char *name, 3927 struct ocfs2_xattr_search *xs) 3928 { 3929 int ret; 3930 struct ocfs2_xattr_block *xb = 3931 (struct ocfs2_xattr_block *)root_bh->b_data; 3932 struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root; 3933 struct ocfs2_extent_list *el = &xb_root->xt_list; 3934 u64 p_blkno = 0; 3935 u32 first_hash, num_clusters = 0; 3936 u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name)); 3937 3938 if (le16_to_cpu(el->l_next_free_rec) == 0) 3939 return -ENODATA; 3940 3941 trace_ocfs2_xattr_index_block_find(OCFS2_I(inode)->ip_blkno, 3942 name, name_index, name_hash, 3943 (unsigned long long)root_bh->b_blocknr, 3944 -1); 3945 3946 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash, 3947 &num_clusters, el); 3948 if (ret) { 3949 mlog_errno(ret); 3950 goto out; 3951 } 3952 3953 BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash); 3954 3955 trace_ocfs2_xattr_index_block_find_rec(OCFS2_I(inode)->ip_blkno, 3956 name, name_index, first_hash, 3957 (unsigned long long)p_blkno, 3958 num_clusters); 3959 3960 ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash, 3961 p_blkno, first_hash, num_clusters, xs); 3962 3963 out: 3964 return ret; 3965 } 3966 3967 static int ocfs2_iterate_xattr_buckets(struct inode *inode, 3968 u64 blkno, 3969 u32 clusters, 3970 xattr_bucket_func *func, 3971 void *para) 3972 { 3973 int i, ret = 0; 3974 u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)); 3975 u32 num_buckets = clusters * bpc; 3976 struct ocfs2_xattr_bucket *bucket; 3977 3978 bucket = ocfs2_xattr_bucket_new(inode); 3979 if (!bucket) { 3980 mlog_errno(-ENOMEM); 3981 return -ENOMEM; 3982 } 3983 3984 trace_ocfs2_iterate_xattr_buckets( 3985 (unsigned long long)OCFS2_I(inode)->ip_blkno, 3986 (unsigned long long)blkno, clusters); 3987 3988 for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) { 3989 ret = ocfs2_read_xattr_bucket(bucket, blkno); 3990 if (ret) { 3991 mlog_errno(ret); 3992 break; 3993 } 3994 3995 /* 3996 * The real bucket num in this series of blocks is stored 3997 * in the 1st bucket. 3998 */ 3999 if (i == 0) 4000 num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets); 4001 4002 trace_ocfs2_iterate_xattr_bucket((unsigned long long)blkno, 4003 le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash)); 4004 if (func) { 4005 ret = func(inode, bucket, para); 4006 if (ret && ret != -ERANGE) 4007 mlog_errno(ret); 4008 /* Fall through to bucket_relse() */ 4009 } 4010 4011 ocfs2_xattr_bucket_relse(bucket); 4012 if (ret) 4013 break; 4014 } 4015 4016 ocfs2_xattr_bucket_free(bucket); 4017 return ret; 4018 } 4019 4020 struct ocfs2_xattr_tree_list { 4021 char *buffer; 4022 size_t buffer_size; 4023 size_t result; 4024 }; 4025 4026 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb, 4027 struct ocfs2_xattr_header *xh, 4028 int index, 4029 int *block_off, 4030 int *new_offset) 4031 { 4032 u16 name_offset; 4033 4034 if (index < 0 || index >= le16_to_cpu(xh->xh_count)) 4035 return -EINVAL; 4036 4037 name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset); 4038 4039 *block_off = name_offset >> sb->s_blocksize_bits; 4040 *new_offset = name_offset % sb->s_blocksize; 4041 4042 return 0; 4043 } 4044 4045 static int ocfs2_list_xattr_bucket(struct inode *inode, 4046 struct ocfs2_xattr_bucket *bucket, 4047 void *para) 4048 { 4049 int ret = 0, type; 4050 struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para; 4051 int i, block_off, new_offset; 4052 const char *name; 4053 4054 for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) { 4055 struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i]; 4056 type = ocfs2_xattr_get_type(entry); 4057 4058 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 4059 bucket_xh(bucket), 4060 i, 4061 &block_off, 4062 &new_offset); 4063 if (ret) 4064 break; 4065 4066 name = (const char *)bucket_block(bucket, block_off) + 4067 new_offset; 4068 ret = ocfs2_xattr_list_entry(inode->i_sb, 4069 xl->buffer, 4070 xl->buffer_size, 4071 &xl->result, 4072 type, name, 4073 entry->xe_name_len); 4074 if (ret) 4075 break; 4076 } 4077 4078 return ret; 4079 } 4080 4081 static int ocfs2_iterate_xattr_index_block(struct inode *inode, 4082 struct buffer_head *blk_bh, 4083 xattr_tree_rec_func *rec_func, 4084 void *para) 4085 { 4086 struct ocfs2_xattr_block *xb = 4087 (struct ocfs2_xattr_block *)blk_bh->b_data; 4088 struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list; 4089 int ret = 0; 4090 u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0; 4091 u64 p_blkno = 0; 4092 4093 if (!el->l_next_free_rec || !rec_func) 4094 return 0; 4095 4096 while (name_hash > 0) { 4097 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, 4098 &e_cpos, &num_clusters, el); 4099 if (ret) { 4100 mlog_errno(ret); 4101 break; 4102 } 4103 4104 ret = rec_func(inode, blk_bh, p_blkno, e_cpos, 4105 num_clusters, para); 4106 if (ret) { 4107 if (ret != -ERANGE) 4108 mlog_errno(ret); 4109 break; 4110 } 4111 4112 if (e_cpos == 0) 4113 break; 4114 4115 name_hash = e_cpos - 1; 4116 } 4117 4118 return ret; 4119 4120 } 4121 4122 static int ocfs2_list_xattr_tree_rec(struct inode *inode, 4123 struct buffer_head *root_bh, 4124 u64 blkno, u32 cpos, u32 len, void *para) 4125 { 4126 return ocfs2_iterate_xattr_buckets(inode, blkno, len, 4127 ocfs2_list_xattr_bucket, para); 4128 } 4129 4130 static int ocfs2_xattr_tree_list_index_block(struct inode *inode, 4131 struct buffer_head *blk_bh, 4132 char *buffer, 4133 size_t buffer_size) 4134 { 4135 int ret; 4136 struct ocfs2_xattr_tree_list xl = { 4137 .buffer = buffer, 4138 .buffer_size = buffer_size, 4139 .result = 0, 4140 }; 4141 4142 ret = ocfs2_iterate_xattr_index_block(inode, blk_bh, 4143 ocfs2_list_xattr_tree_rec, &xl); 4144 if (ret) { 4145 mlog_errno(ret); 4146 goto out; 4147 } 4148 4149 ret = xl.result; 4150 out: 4151 return ret; 4152 } 4153 4154 static int cmp_xe(const void *a, const void *b) 4155 { 4156 const struct ocfs2_xattr_entry *l = a, *r = b; 4157 u32 l_hash = le32_to_cpu(l->xe_name_hash); 4158 u32 r_hash = le32_to_cpu(r->xe_name_hash); 4159 4160 if (l_hash > r_hash) 4161 return 1; 4162 if (l_hash < r_hash) 4163 return -1; 4164 return 0; 4165 } 4166 4167 static void swap_xe(void *a, void *b, int size) 4168 { 4169 struct ocfs2_xattr_entry *l = a, *r = b, tmp; 4170 4171 tmp = *l; 4172 memcpy(l, r, sizeof(struct ocfs2_xattr_entry)); 4173 memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry)); 4174 } 4175 4176 /* 4177 * When the ocfs2_xattr_block is filled up, new bucket will be created 4178 * and all the xattr entries will be moved to the new bucket. 4179 * The header goes at the start of the bucket, and the names+values are 4180 * filled from the end. This is why *target starts as the last buffer. 4181 * Note: we need to sort the entries since they are not saved in order 4182 * in the ocfs2_xattr_block. 4183 */ 4184 static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode, 4185 struct buffer_head *xb_bh, 4186 struct ocfs2_xattr_bucket *bucket) 4187 { 4188 int i, blocksize = inode->i_sb->s_blocksize; 4189 int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 4190 u16 offset, size, off_change; 4191 struct ocfs2_xattr_entry *xe; 4192 struct ocfs2_xattr_block *xb = 4193 (struct ocfs2_xattr_block *)xb_bh->b_data; 4194 struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header; 4195 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 4196 u16 count = le16_to_cpu(xb_xh->xh_count); 4197 char *src = xb_bh->b_data; 4198 char *target = bucket_block(bucket, blks - 1); 4199 4200 trace_ocfs2_cp_xattr_block_to_bucket_begin( 4201 (unsigned long long)xb_bh->b_blocknr, 4202 (unsigned long long)bucket_blkno(bucket)); 4203 4204 for (i = 0; i < blks; i++) 4205 memset(bucket_block(bucket, i), 0, blocksize); 4206 4207 /* 4208 * Since the xe_name_offset is based on ocfs2_xattr_header, 4209 * there is a offset change corresponding to the change of 4210 * ocfs2_xattr_header's position. 4211 */ 4212 off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); 4213 xe = &xb_xh->xh_entries[count - 1]; 4214 offset = le16_to_cpu(xe->xe_name_offset) + off_change; 4215 size = blocksize - offset; 4216 4217 /* copy all the names and values. */ 4218 memcpy(target + offset, src + offset, size); 4219 4220 /* Init new header now. */ 4221 xh->xh_count = xb_xh->xh_count; 4222 xh->xh_num_buckets = cpu_to_le16(1); 4223 xh->xh_name_value_len = cpu_to_le16(size); 4224 xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size); 4225 4226 /* copy all the entries. */ 4227 target = bucket_block(bucket, 0); 4228 offset = offsetof(struct ocfs2_xattr_header, xh_entries); 4229 size = count * sizeof(struct ocfs2_xattr_entry); 4230 memcpy(target + offset, (char *)xb_xh + offset, size); 4231 4232 /* Change the xe offset for all the xe because of the move. */ 4233 off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize + 4234 offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); 4235 for (i = 0; i < count; i++) 4236 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change); 4237 4238 trace_ocfs2_cp_xattr_block_to_bucket_end(offset, size, off_change); 4239 4240 sort(target + offset, count, sizeof(struct ocfs2_xattr_entry), 4241 cmp_xe, swap_xe); 4242 } 4243 4244 /* 4245 * After we move xattr from block to index btree, we have to 4246 * update ocfs2_xattr_search to the new xe and base. 4247 * 4248 * When the entry is in xattr block, xattr_bh indicates the storage place. 4249 * While if the entry is in index b-tree, "bucket" indicates the 4250 * real place of the xattr. 4251 */ 4252 static void ocfs2_xattr_update_xattr_search(struct inode *inode, 4253 struct ocfs2_xattr_search *xs, 4254 struct buffer_head *old_bh) 4255 { 4256 char *buf = old_bh->b_data; 4257 struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf; 4258 struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header; 4259 int i; 4260 4261 xs->header = bucket_xh(xs->bucket); 4262 xs->base = bucket_block(xs->bucket, 0); 4263 xs->end = xs->base + inode->i_sb->s_blocksize; 4264 4265 if (xs->not_found) 4266 return; 4267 4268 i = xs->here - old_xh->xh_entries; 4269 xs->here = &xs->header->xh_entries[i]; 4270 } 4271 4272 static int ocfs2_xattr_create_index_block(struct inode *inode, 4273 struct ocfs2_xattr_search *xs, 4274 struct ocfs2_xattr_set_ctxt *ctxt) 4275 { 4276 int ret; 4277 u32 bit_off, len; 4278 u64 blkno; 4279 handle_t *handle = ctxt->handle; 4280 struct ocfs2_inode_info *oi = OCFS2_I(inode); 4281 struct buffer_head *xb_bh = xs->xattr_bh; 4282 struct ocfs2_xattr_block *xb = 4283 (struct ocfs2_xattr_block *)xb_bh->b_data; 4284 struct ocfs2_xattr_tree_root *xr; 4285 u16 xb_flags = le16_to_cpu(xb->xb_flags); 4286 4287 trace_ocfs2_xattr_create_index_block_begin( 4288 (unsigned long long)xb_bh->b_blocknr); 4289 4290 BUG_ON(xb_flags & OCFS2_XATTR_INDEXED); 4291 BUG_ON(!xs->bucket); 4292 4293 /* 4294 * XXX: 4295 * We can use this lock for now, and maybe move to a dedicated mutex 4296 * if performance becomes a problem later. 4297 */ 4298 down_write(&oi->ip_alloc_sem); 4299 4300 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), xb_bh, 4301 OCFS2_JOURNAL_ACCESS_WRITE); 4302 if (ret) { 4303 mlog_errno(ret); 4304 goto out; 4305 } 4306 4307 ret = __ocfs2_claim_clusters(handle, ctxt->data_ac, 4308 1, 1, &bit_off, &len); 4309 if (ret) { 4310 mlog_errno(ret); 4311 goto out; 4312 } 4313 4314 /* 4315 * The bucket may spread in many blocks, and 4316 * we will only touch the 1st block and the last block 4317 * in the whole bucket(one for entry and one for data). 4318 */ 4319 blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off); 4320 4321 trace_ocfs2_xattr_create_index_block((unsigned long long)blkno); 4322 4323 ret = ocfs2_init_xattr_bucket(xs->bucket, blkno, 1); 4324 if (ret) { 4325 mlog_errno(ret); 4326 goto out; 4327 } 4328 4329 ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket, 4330 OCFS2_JOURNAL_ACCESS_CREATE); 4331 if (ret) { 4332 mlog_errno(ret); 4333 goto out; 4334 } 4335 4336 ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket); 4337 ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket); 4338 4339 ocfs2_xattr_update_xattr_search(inode, xs, xb_bh); 4340 4341 /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */ 4342 memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize - 4343 offsetof(struct ocfs2_xattr_block, xb_attrs)); 4344 4345 xr = &xb->xb_attrs.xb_root; 4346 xr->xt_clusters = cpu_to_le32(1); 4347 xr->xt_last_eb_blk = 0; 4348 xr->xt_list.l_tree_depth = 0; 4349 xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb)); 4350 xr->xt_list.l_next_free_rec = cpu_to_le16(1); 4351 4352 xr->xt_list.l_recs[0].e_cpos = 0; 4353 xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno); 4354 xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1); 4355 4356 xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED); 4357 4358 ocfs2_journal_dirty(handle, xb_bh); 4359 4360 out: 4361 up_write(&oi->ip_alloc_sem); 4362 4363 return ret; 4364 } 4365 4366 static int cmp_xe_offset(const void *a, const void *b) 4367 { 4368 const struct ocfs2_xattr_entry *l = a, *r = b; 4369 u32 l_name_offset = le16_to_cpu(l->xe_name_offset); 4370 u32 r_name_offset = le16_to_cpu(r->xe_name_offset); 4371 4372 if (l_name_offset < r_name_offset) 4373 return 1; 4374 if (l_name_offset > r_name_offset) 4375 return -1; 4376 return 0; 4377 } 4378 4379 /* 4380 * defrag a xattr bucket if we find that the bucket has some 4381 * holes beteen name/value pairs. 4382 * We will move all the name/value pairs to the end of the bucket 4383 * so that we can spare some space for insertion. 4384 */ 4385 static int ocfs2_defrag_xattr_bucket(struct inode *inode, 4386 handle_t *handle, 4387 struct ocfs2_xattr_bucket *bucket) 4388 { 4389 int ret, i; 4390 size_t end, offset, len; 4391 struct ocfs2_xattr_header *xh; 4392 char *entries, *buf, *bucket_buf = NULL; 4393 u64 blkno = bucket_blkno(bucket); 4394 u16 xh_free_start; 4395 size_t blocksize = inode->i_sb->s_blocksize; 4396 struct ocfs2_xattr_entry *xe; 4397 4398 /* 4399 * In order to make the operation more efficient and generic, 4400 * we copy all the blocks into a contiguous memory and do the 4401 * defragment there, so if anything is error, we will not touch 4402 * the real block. 4403 */ 4404 bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS); 4405 if (!bucket_buf) { 4406 ret = -EIO; 4407 goto out; 4408 } 4409 4410 buf = bucket_buf; 4411 for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize) 4412 memcpy(buf, bucket_block(bucket, i), blocksize); 4413 4414 ret = ocfs2_xattr_bucket_journal_access(handle, bucket, 4415 OCFS2_JOURNAL_ACCESS_WRITE); 4416 if (ret < 0) { 4417 mlog_errno(ret); 4418 goto out; 4419 } 4420 4421 xh = (struct ocfs2_xattr_header *)bucket_buf; 4422 entries = (char *)xh->xh_entries; 4423 xh_free_start = le16_to_cpu(xh->xh_free_start); 4424 4425 trace_ocfs2_defrag_xattr_bucket( 4426 (unsigned long long)blkno, le16_to_cpu(xh->xh_count), 4427 xh_free_start, le16_to_cpu(xh->xh_name_value_len)); 4428 4429 /* 4430 * sort all the entries by their offset. 4431 * the largest will be the first, so that we can 4432 * move them to the end one by one. 4433 */ 4434 sort(entries, le16_to_cpu(xh->xh_count), 4435 sizeof(struct ocfs2_xattr_entry), 4436 cmp_xe_offset, swap_xe); 4437 4438 /* Move all name/values to the end of the bucket. */ 4439 xe = xh->xh_entries; 4440 end = OCFS2_XATTR_BUCKET_SIZE; 4441 for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) { 4442 offset = le16_to_cpu(xe->xe_name_offset); 4443 len = namevalue_size_xe(xe); 4444 4445 /* 4446 * We must make sure that the name/value pair 4447 * exist in the same block. So adjust end to 4448 * the previous block end if needed. 4449 */ 4450 if (((end - len) / blocksize != 4451 (end - 1) / blocksize)) 4452 end = end - end % blocksize; 4453 4454 if (end > offset + len) { 4455 memmove(bucket_buf + end - len, 4456 bucket_buf + offset, len); 4457 xe->xe_name_offset = cpu_to_le16(end - len); 4458 } 4459 4460 mlog_bug_on_msg(end < offset + len, "Defrag check failed for " 4461 "bucket %llu\n", (unsigned long long)blkno); 4462 4463 end -= len; 4464 } 4465 4466 mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for " 4467 "bucket %llu\n", (unsigned long long)blkno); 4468 4469 if (xh_free_start == end) 4470 goto out; 4471 4472 memset(bucket_buf + xh_free_start, 0, end - xh_free_start); 4473 xh->xh_free_start = cpu_to_le16(end); 4474 4475 /* sort the entries by their name_hash. */ 4476 sort(entries, le16_to_cpu(xh->xh_count), 4477 sizeof(struct ocfs2_xattr_entry), 4478 cmp_xe, swap_xe); 4479 4480 buf = bucket_buf; 4481 for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize) 4482 memcpy(bucket_block(bucket, i), buf, blocksize); 4483 ocfs2_xattr_bucket_journal_dirty(handle, bucket); 4484 4485 out: 4486 kfree(bucket_buf); 4487 return ret; 4488 } 4489 4490 /* 4491 * prev_blkno points to the start of an existing extent. new_blkno 4492 * points to a newly allocated extent. Because we know each of our 4493 * clusters contains more than bucket, we can easily split one cluster 4494 * at a bucket boundary. So we take the last cluster of the existing 4495 * extent and split it down the middle. We move the last half of the 4496 * buckets in the last cluster of the existing extent over to the new 4497 * extent. 4498 * 4499 * first_bh is the buffer at prev_blkno so we can update the existing 4500 * extent's bucket count. header_bh is the bucket were we were hoping 4501 * to insert our xattr. If the bucket move places the target in the new 4502 * extent, we'll update first_bh and header_bh after modifying the old 4503 * extent. 4504 * 4505 * first_hash will be set as the 1st xe's name_hash in the new extent. 4506 */ 4507 static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode, 4508 handle_t *handle, 4509 struct ocfs2_xattr_bucket *first, 4510 struct ocfs2_xattr_bucket *target, 4511 u64 new_blkno, 4512 u32 num_clusters, 4513 u32 *first_hash) 4514 { 4515 int ret; 4516 struct super_block *sb = inode->i_sb; 4517 int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(sb); 4518 int num_buckets = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb)); 4519 int to_move = num_buckets / 2; 4520 u64 src_blkno; 4521 u64 last_cluster_blkno = bucket_blkno(first) + 4522 ((num_clusters - 1) * ocfs2_clusters_to_blocks(sb, 1)); 4523 4524 BUG_ON(le16_to_cpu(bucket_xh(first)->xh_num_buckets) < num_buckets); 4525 BUG_ON(OCFS2_XATTR_BUCKET_SIZE == OCFS2_SB(sb)->s_clustersize); 4526 4527 trace_ocfs2_mv_xattr_bucket_cross_cluster( 4528 (unsigned long long)last_cluster_blkno, 4529 (unsigned long long)new_blkno); 4530 4531 ret = ocfs2_mv_xattr_buckets(inode, handle, bucket_blkno(first), 4532 last_cluster_blkno, new_blkno, 4533 to_move, first_hash); 4534 if (ret) { 4535 mlog_errno(ret); 4536 goto out; 4537 } 4538 4539 /* This is the first bucket that got moved */ 4540 src_blkno = last_cluster_blkno + (to_move * blks_per_bucket); 4541 4542 /* 4543 * If the target bucket was part of the moved buckets, we need to 4544 * update first and target. 4545 */ 4546 if (bucket_blkno(target) >= src_blkno) { 4547 /* Find the block for the new target bucket */ 4548 src_blkno = new_blkno + 4549 (bucket_blkno(target) - src_blkno); 4550 4551 ocfs2_xattr_bucket_relse(first); 4552 ocfs2_xattr_bucket_relse(target); 4553 4554 /* 4555 * These shouldn't fail - the buffers are in the 4556 * journal from ocfs2_cp_xattr_bucket(). 4557 */ 4558 ret = ocfs2_read_xattr_bucket(first, new_blkno); 4559 if (ret) { 4560 mlog_errno(ret); 4561 goto out; 4562 } 4563 ret = ocfs2_read_xattr_bucket(target, src_blkno); 4564 if (ret) 4565 mlog_errno(ret); 4566 4567 } 4568 4569 out: 4570 return ret; 4571 } 4572 4573 /* 4574 * Find the suitable pos when we divide a bucket into 2. 4575 * We have to make sure the xattrs with the same hash value exist 4576 * in the same bucket. 4577 * 4578 * If this ocfs2_xattr_header covers more than one hash value, find a 4579 * place where the hash value changes. Try to find the most even split. 4580 * The most common case is that all entries have different hash values, 4581 * and the first check we make will find a place to split. 4582 */ 4583 static int ocfs2_xattr_find_divide_pos(struct ocfs2_xattr_header *xh) 4584 { 4585 struct ocfs2_xattr_entry *entries = xh->xh_entries; 4586 int count = le16_to_cpu(xh->xh_count); 4587 int delta, middle = count / 2; 4588 4589 /* 4590 * We start at the middle. Each step gets farther away in both 4591 * directions. We therefore hit the change in hash value 4592 * nearest to the middle. Note that this loop does not execute for 4593 * count < 2. 4594 */ 4595 for (delta = 0; delta < middle; delta++) { 4596 /* Let's check delta earlier than middle */ 4597 if (cmp_xe(&entries[middle - delta - 1], 4598 &entries[middle - delta])) 4599 return middle - delta; 4600 4601 /* For even counts, don't walk off the end */ 4602 if ((middle + delta + 1) == count) 4603 continue; 4604 4605 /* Now try delta past middle */ 4606 if (cmp_xe(&entries[middle + delta], 4607 &entries[middle + delta + 1])) 4608 return middle + delta + 1; 4609 } 4610 4611 /* Every entry had the same hash */ 4612 return count; 4613 } 4614 4615 /* 4616 * Move some xattrs in old bucket(blk) to new bucket(new_blk). 4617 * first_hash will record the 1st hash of the new bucket. 4618 * 4619 * Normally half of the xattrs will be moved. But we have to make 4620 * sure that the xattrs with the same hash value are stored in the 4621 * same bucket. If all the xattrs in this bucket have the same hash 4622 * value, the new bucket will be initialized as an empty one and the 4623 * first_hash will be initialized as (hash_value+1). 4624 */ 4625 static int ocfs2_divide_xattr_bucket(struct inode *inode, 4626 handle_t *handle, 4627 u64 blk, 4628 u64 new_blk, 4629 u32 *first_hash, 4630 int new_bucket_head) 4631 { 4632 int ret, i; 4633 int count, start, len, name_value_len = 0, name_offset = 0; 4634 struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL; 4635 struct ocfs2_xattr_header *xh; 4636 struct ocfs2_xattr_entry *xe; 4637 int blocksize = inode->i_sb->s_blocksize; 4638 4639 trace_ocfs2_divide_xattr_bucket_begin((unsigned long long)blk, 4640 (unsigned long long)new_blk); 4641 4642 s_bucket = ocfs2_xattr_bucket_new(inode); 4643 t_bucket = ocfs2_xattr_bucket_new(inode); 4644 if (!s_bucket || !t_bucket) { 4645 ret = -ENOMEM; 4646 mlog_errno(ret); 4647 goto out; 4648 } 4649 4650 ret = ocfs2_read_xattr_bucket(s_bucket, blk); 4651 if (ret) { 4652 mlog_errno(ret); 4653 goto out; 4654 } 4655 4656 ret = ocfs2_xattr_bucket_journal_access(handle, s_bucket, 4657 OCFS2_JOURNAL_ACCESS_WRITE); 4658 if (ret) { 4659 mlog_errno(ret); 4660 goto out; 4661 } 4662 4663 /* 4664 * Even if !new_bucket_head, we're overwriting t_bucket. Thus, 4665 * there's no need to read it. 4666 */ 4667 ret = ocfs2_init_xattr_bucket(t_bucket, new_blk, new_bucket_head); 4668 if (ret) { 4669 mlog_errno(ret); 4670 goto out; 4671 } 4672 4673 /* 4674 * Hey, if we're overwriting t_bucket, what difference does 4675 * ACCESS_CREATE vs ACCESS_WRITE make? See the comment in the 4676 * same part of ocfs2_cp_xattr_bucket(). 4677 */ 4678 ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket, 4679 new_bucket_head ? 4680 OCFS2_JOURNAL_ACCESS_CREATE : 4681 OCFS2_JOURNAL_ACCESS_WRITE); 4682 if (ret) { 4683 mlog_errno(ret); 4684 goto out; 4685 } 4686 4687 xh = bucket_xh(s_bucket); 4688 count = le16_to_cpu(xh->xh_count); 4689 start = ocfs2_xattr_find_divide_pos(xh); 4690 4691 if (start == count) { 4692 xe = &xh->xh_entries[start-1]; 4693 4694 /* 4695 * initialized a new empty bucket here. 4696 * The hash value is set as one larger than 4697 * that of the last entry in the previous bucket. 4698 */ 4699 for (i = 0; i < t_bucket->bu_blocks; i++) 4700 memset(bucket_block(t_bucket, i), 0, blocksize); 4701 4702 xh = bucket_xh(t_bucket); 4703 xh->xh_free_start = cpu_to_le16(blocksize); 4704 xh->xh_entries[0].xe_name_hash = xe->xe_name_hash; 4705 le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1); 4706 4707 goto set_num_buckets; 4708 } 4709 4710 /* copy the whole bucket to the new first. */ 4711 ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket); 4712 4713 /* update the new bucket. */ 4714 xh = bucket_xh(t_bucket); 4715 4716 /* 4717 * Calculate the total name/value len and xh_free_start for 4718 * the old bucket first. 4719 */ 4720 name_offset = OCFS2_XATTR_BUCKET_SIZE; 4721 name_value_len = 0; 4722 for (i = 0; i < start; i++) { 4723 xe = &xh->xh_entries[i]; 4724 name_value_len += namevalue_size_xe(xe); 4725 if (le16_to_cpu(xe->xe_name_offset) < name_offset) 4726 name_offset = le16_to_cpu(xe->xe_name_offset); 4727 } 4728 4729 /* 4730 * Now begin the modification to the new bucket. 4731 * 4732 * In the new bucket, We just move the xattr entry to the beginning 4733 * and don't touch the name/value. So there will be some holes in the 4734 * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is 4735 * called. 4736 */ 4737 xe = &xh->xh_entries[start]; 4738 len = sizeof(struct ocfs2_xattr_entry) * (count - start); 4739 trace_ocfs2_divide_xattr_bucket_move(len, 4740 (int)((char *)xe - (char *)xh), 4741 (int)((char *)xh->xh_entries - (char *)xh)); 4742 memmove((char *)xh->xh_entries, (char *)xe, len); 4743 xe = &xh->xh_entries[count - start]; 4744 len = sizeof(struct ocfs2_xattr_entry) * start; 4745 memset((char *)xe, 0, len); 4746 4747 le16_add_cpu(&xh->xh_count, -start); 4748 le16_add_cpu(&xh->xh_name_value_len, -name_value_len); 4749 4750 /* Calculate xh_free_start for the new bucket. */ 4751 xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE); 4752 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 4753 xe = &xh->xh_entries[i]; 4754 if (le16_to_cpu(xe->xe_name_offset) < 4755 le16_to_cpu(xh->xh_free_start)) 4756 xh->xh_free_start = xe->xe_name_offset; 4757 } 4758 4759 set_num_buckets: 4760 /* set xh->xh_num_buckets for the new xh. */ 4761 if (new_bucket_head) 4762 xh->xh_num_buckets = cpu_to_le16(1); 4763 else 4764 xh->xh_num_buckets = 0; 4765 4766 ocfs2_xattr_bucket_journal_dirty(handle, t_bucket); 4767 4768 /* store the first_hash of the new bucket. */ 4769 if (first_hash) 4770 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash); 4771 4772 /* 4773 * Now only update the 1st block of the old bucket. If we 4774 * just added a new empty bucket, there is no need to modify 4775 * it. 4776 */ 4777 if (start == count) 4778 goto out; 4779 4780 xh = bucket_xh(s_bucket); 4781 memset(&xh->xh_entries[start], 0, 4782 sizeof(struct ocfs2_xattr_entry) * (count - start)); 4783 xh->xh_count = cpu_to_le16(start); 4784 xh->xh_free_start = cpu_to_le16(name_offset); 4785 xh->xh_name_value_len = cpu_to_le16(name_value_len); 4786 4787 ocfs2_xattr_bucket_journal_dirty(handle, s_bucket); 4788 4789 out: 4790 ocfs2_xattr_bucket_free(s_bucket); 4791 ocfs2_xattr_bucket_free(t_bucket); 4792 4793 return ret; 4794 } 4795 4796 /* 4797 * Copy xattr from one bucket to another bucket. 4798 * 4799 * The caller must make sure that the journal transaction 4800 * has enough space for journaling. 4801 */ 4802 static int ocfs2_cp_xattr_bucket(struct inode *inode, 4803 handle_t *handle, 4804 u64 s_blkno, 4805 u64 t_blkno, 4806 int t_is_new) 4807 { 4808 int ret; 4809 struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL; 4810 4811 BUG_ON(s_blkno == t_blkno); 4812 4813 trace_ocfs2_cp_xattr_bucket((unsigned long long)s_blkno, 4814 (unsigned long long)t_blkno, 4815 t_is_new); 4816 4817 s_bucket = ocfs2_xattr_bucket_new(inode); 4818 t_bucket = ocfs2_xattr_bucket_new(inode); 4819 if (!s_bucket || !t_bucket) { 4820 ret = -ENOMEM; 4821 mlog_errno(ret); 4822 goto out; 4823 } 4824 4825 ret = ocfs2_read_xattr_bucket(s_bucket, s_blkno); 4826 if (ret) 4827 goto out; 4828 4829 /* 4830 * Even if !t_is_new, we're overwriting t_bucket. Thus, 4831 * there's no need to read it. 4832 */ 4833 ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno, t_is_new); 4834 if (ret) 4835 goto out; 4836 4837 /* 4838 * Hey, if we're overwriting t_bucket, what difference does 4839 * ACCESS_CREATE vs ACCESS_WRITE make? Well, if we allocated a new 4840 * cluster to fill, we came here from 4841 * ocfs2_mv_xattr_buckets(), and it is really new - 4842 * ACCESS_CREATE is required. But we also might have moved data 4843 * out of t_bucket before extending back into it. 4844 * ocfs2_add_new_xattr_bucket() can do this - its call to 4845 * ocfs2_add_new_xattr_cluster() may have created a new extent 4846 * and copied out the end of the old extent. Then it re-extends 4847 * the old extent back to create space for new xattrs. That's 4848 * how we get here, and the bucket isn't really new. 4849 */ 4850 ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket, 4851 t_is_new ? 4852 OCFS2_JOURNAL_ACCESS_CREATE : 4853 OCFS2_JOURNAL_ACCESS_WRITE); 4854 if (ret) 4855 goto out; 4856 4857 ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket); 4858 ocfs2_xattr_bucket_journal_dirty(handle, t_bucket); 4859 4860 out: 4861 ocfs2_xattr_bucket_free(t_bucket); 4862 ocfs2_xattr_bucket_free(s_bucket); 4863 4864 return ret; 4865 } 4866 4867 /* 4868 * src_blk points to the start of an existing extent. last_blk points to 4869 * last cluster in that extent. to_blk points to a newly allocated 4870 * extent. We copy the buckets from the cluster at last_blk to the new 4871 * extent. If start_bucket is non-zero, we skip that many buckets before 4872 * we start copying. The new extent's xh_num_buckets gets set to the 4873 * number of buckets we copied. The old extent's xh_num_buckets shrinks 4874 * by the same amount. 4875 */ 4876 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle, 4877 u64 src_blk, u64 last_blk, u64 to_blk, 4878 unsigned int start_bucket, 4879 u32 *first_hash) 4880 { 4881 int i, ret, credits; 4882 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 4883 int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 4884 int num_buckets = ocfs2_xattr_buckets_per_cluster(osb); 4885 struct ocfs2_xattr_bucket *old_first, *new_first; 4886 4887 trace_ocfs2_mv_xattr_buckets((unsigned long long)last_blk, 4888 (unsigned long long)to_blk); 4889 4890 BUG_ON(start_bucket >= num_buckets); 4891 if (start_bucket) { 4892 num_buckets -= start_bucket; 4893 last_blk += (start_bucket * blks_per_bucket); 4894 } 4895 4896 /* The first bucket of the original extent */ 4897 old_first = ocfs2_xattr_bucket_new(inode); 4898 /* The first bucket of the new extent */ 4899 new_first = ocfs2_xattr_bucket_new(inode); 4900 if (!old_first || !new_first) { 4901 ret = -ENOMEM; 4902 mlog_errno(ret); 4903 goto out; 4904 } 4905 4906 ret = ocfs2_read_xattr_bucket(old_first, src_blk); 4907 if (ret) { 4908 mlog_errno(ret); 4909 goto out; 4910 } 4911 4912 /* 4913 * We need to update the first bucket of the old extent and all 4914 * the buckets going to the new extent. 4915 */ 4916 credits = ((num_buckets + 1) * blks_per_bucket); 4917 ret = ocfs2_extend_trans(handle, credits); 4918 if (ret) { 4919 mlog_errno(ret); 4920 goto out; 4921 } 4922 4923 ret = ocfs2_xattr_bucket_journal_access(handle, old_first, 4924 OCFS2_JOURNAL_ACCESS_WRITE); 4925 if (ret) { 4926 mlog_errno(ret); 4927 goto out; 4928 } 4929 4930 for (i = 0; i < num_buckets; i++) { 4931 ret = ocfs2_cp_xattr_bucket(inode, handle, 4932 last_blk + (i * blks_per_bucket), 4933 to_blk + (i * blks_per_bucket), 4934 1); 4935 if (ret) { 4936 mlog_errno(ret); 4937 goto out; 4938 } 4939 } 4940 4941 /* 4942 * Get the new bucket ready before we dirty anything 4943 * (This actually shouldn't fail, because we already dirtied 4944 * it once in ocfs2_cp_xattr_bucket()). 4945 */ 4946 ret = ocfs2_read_xattr_bucket(new_first, to_blk); 4947 if (ret) { 4948 mlog_errno(ret); 4949 goto out; 4950 } 4951 ret = ocfs2_xattr_bucket_journal_access(handle, new_first, 4952 OCFS2_JOURNAL_ACCESS_WRITE); 4953 if (ret) { 4954 mlog_errno(ret); 4955 goto out; 4956 } 4957 4958 /* Now update the headers */ 4959 le16_add_cpu(&bucket_xh(old_first)->xh_num_buckets, -num_buckets); 4960 ocfs2_xattr_bucket_journal_dirty(handle, old_first); 4961 4962 bucket_xh(new_first)->xh_num_buckets = cpu_to_le16(num_buckets); 4963 ocfs2_xattr_bucket_journal_dirty(handle, new_first); 4964 4965 if (first_hash) 4966 *first_hash = le32_to_cpu(bucket_xh(new_first)->xh_entries[0].xe_name_hash); 4967 4968 out: 4969 ocfs2_xattr_bucket_free(new_first); 4970 ocfs2_xattr_bucket_free(old_first); 4971 return ret; 4972 } 4973 4974 /* 4975 * Move some xattrs in this cluster to the new cluster. 4976 * This function should only be called when bucket size == cluster size. 4977 * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead. 4978 */ 4979 static int ocfs2_divide_xattr_cluster(struct inode *inode, 4980 handle_t *handle, 4981 u64 prev_blk, 4982 u64 new_blk, 4983 u32 *first_hash) 4984 { 4985 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 4986 int ret, credits = 2 * blk_per_bucket; 4987 4988 BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize); 4989 4990 ret = ocfs2_extend_trans(handle, credits); 4991 if (ret) { 4992 mlog_errno(ret); 4993 return ret; 4994 } 4995 4996 /* Move half of the xattr in start_blk to the next bucket. */ 4997 return ocfs2_divide_xattr_bucket(inode, handle, prev_blk, 4998 new_blk, first_hash, 1); 4999 } 5000 5001 /* 5002 * Move some xattrs from the old cluster to the new one since they are not 5003 * contiguous in ocfs2 xattr tree. 5004 * 5005 * new_blk starts a new separate cluster, and we will move some xattrs from 5006 * prev_blk to it. v_start will be set as the first name hash value in this 5007 * new cluster so that it can be used as e_cpos during tree insertion and 5008 * don't collide with our original b-tree operations. first_bh and header_bh 5009 * will also be updated since they will be used in ocfs2_extend_xattr_bucket 5010 * to extend the insert bucket. 5011 * 5012 * The problem is how much xattr should we move to the new one and when should 5013 * we update first_bh and header_bh? 5014 * 1. If cluster size > bucket size, that means the previous cluster has more 5015 * than 1 bucket, so just move half nums of bucket into the new cluster and 5016 * update the first_bh and header_bh if the insert bucket has been moved 5017 * to the new cluster. 5018 * 2. If cluster_size == bucket_size: 5019 * a) If the previous extent rec has more than one cluster and the insert 5020 * place isn't in the last cluster, copy the entire last cluster to the 5021 * new one. This time, we don't need to upate the first_bh and header_bh 5022 * since they will not be moved into the new cluster. 5023 * b) Otherwise, move the bottom half of the xattrs in the last cluster into 5024 * the new one. And we set the extend flag to zero if the insert place is 5025 * moved into the new allocated cluster since no extend is needed. 5026 */ 5027 static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode, 5028 handle_t *handle, 5029 struct ocfs2_xattr_bucket *first, 5030 struct ocfs2_xattr_bucket *target, 5031 u64 new_blk, 5032 u32 prev_clusters, 5033 u32 *v_start, 5034 int *extend) 5035 { 5036 int ret; 5037 5038 trace_ocfs2_adjust_xattr_cross_cluster( 5039 (unsigned long long)bucket_blkno(first), 5040 (unsigned long long)new_blk, prev_clusters); 5041 5042 if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) { 5043 ret = ocfs2_mv_xattr_bucket_cross_cluster(inode, 5044 handle, 5045 first, target, 5046 new_blk, 5047 prev_clusters, 5048 v_start); 5049 if (ret) 5050 mlog_errno(ret); 5051 } else { 5052 /* The start of the last cluster in the first extent */ 5053 u64 last_blk = bucket_blkno(first) + 5054 ((prev_clusters - 1) * 5055 ocfs2_clusters_to_blocks(inode->i_sb, 1)); 5056 5057 if (prev_clusters > 1 && bucket_blkno(target) != last_blk) { 5058 ret = ocfs2_mv_xattr_buckets(inode, handle, 5059 bucket_blkno(first), 5060 last_blk, new_blk, 0, 5061 v_start); 5062 if (ret) 5063 mlog_errno(ret); 5064 } else { 5065 ret = ocfs2_divide_xattr_cluster(inode, handle, 5066 last_blk, new_blk, 5067 v_start); 5068 if (ret) 5069 mlog_errno(ret); 5070 5071 if ((bucket_blkno(target) == last_blk) && extend) 5072 *extend = 0; 5073 } 5074 } 5075 5076 return ret; 5077 } 5078 5079 /* 5080 * Add a new cluster for xattr storage. 5081 * 5082 * If the new cluster is contiguous with the previous one, it will be 5083 * appended to the same extent record, and num_clusters will be updated. 5084 * If not, we will insert a new extent for it and move some xattrs in 5085 * the last cluster into the new allocated one. 5086 * We also need to limit the maximum size of a btree leaf, otherwise we'll 5087 * lose the benefits of hashing because we'll have to search large leaves. 5088 * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize, 5089 * if it's bigger). 5090 * 5091 * first_bh is the first block of the previous extent rec and header_bh 5092 * indicates the bucket we will insert the new xattrs. They will be updated 5093 * when the header_bh is moved into the new cluster. 5094 */ 5095 static int ocfs2_add_new_xattr_cluster(struct inode *inode, 5096 struct buffer_head *root_bh, 5097 struct ocfs2_xattr_bucket *first, 5098 struct ocfs2_xattr_bucket *target, 5099 u32 *num_clusters, 5100 u32 prev_cpos, 5101 int *extend, 5102 struct ocfs2_xattr_set_ctxt *ctxt) 5103 { 5104 int ret; 5105 u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 5106 u32 prev_clusters = *num_clusters; 5107 u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0; 5108 u64 block; 5109 handle_t *handle = ctxt->handle; 5110 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5111 struct ocfs2_extent_tree et; 5112 5113 trace_ocfs2_add_new_xattr_cluster_begin( 5114 (unsigned long long)OCFS2_I(inode)->ip_blkno, 5115 (unsigned long long)bucket_blkno(first), 5116 prev_cpos, prev_clusters); 5117 5118 ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh); 5119 5120 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh, 5121 OCFS2_JOURNAL_ACCESS_WRITE); 5122 if (ret < 0) { 5123 mlog_errno(ret); 5124 goto leave; 5125 } 5126 5127 ret = __ocfs2_claim_clusters(handle, ctxt->data_ac, 1, 5128 clusters_to_add, &bit_off, &num_bits); 5129 if (ret < 0) { 5130 if (ret != -ENOSPC) 5131 mlog_errno(ret); 5132 goto leave; 5133 } 5134 5135 BUG_ON(num_bits > clusters_to_add); 5136 5137 block = ocfs2_clusters_to_blocks(osb->sb, bit_off); 5138 trace_ocfs2_add_new_xattr_cluster((unsigned long long)block, num_bits); 5139 5140 if (bucket_blkno(first) + (prev_clusters * bpc) == block && 5141 (prev_clusters + num_bits) << osb->s_clustersize_bits <= 5142 OCFS2_MAX_XATTR_TREE_LEAF_SIZE) { 5143 /* 5144 * If this cluster is contiguous with the old one and 5145 * adding this new cluster, we don't surpass the limit of 5146 * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be 5147 * initialized and used like other buckets in the previous 5148 * cluster. 5149 * So add it as a contiguous one. The caller will handle 5150 * its init process. 5151 */ 5152 v_start = prev_cpos + prev_clusters; 5153 *num_clusters = prev_clusters + num_bits; 5154 } else { 5155 ret = ocfs2_adjust_xattr_cross_cluster(inode, 5156 handle, 5157 first, 5158 target, 5159 block, 5160 prev_clusters, 5161 &v_start, 5162 extend); 5163 if (ret) { 5164 mlog_errno(ret); 5165 goto leave; 5166 } 5167 } 5168 5169 trace_ocfs2_add_new_xattr_cluster_insert((unsigned long long)block, 5170 v_start, num_bits); 5171 ret = ocfs2_insert_extent(handle, &et, v_start, block, 5172 num_bits, 0, ctxt->meta_ac); 5173 if (ret < 0) { 5174 mlog_errno(ret); 5175 goto leave; 5176 } 5177 5178 ocfs2_journal_dirty(handle, root_bh); 5179 5180 leave: 5181 return ret; 5182 } 5183 5184 /* 5185 * We are given an extent. 'first' is the bucket at the very front of 5186 * the extent. The extent has space for an additional bucket past 5187 * bucket_xh(first)->xh_num_buckets. 'target_blkno' is the block number 5188 * of the target bucket. We wish to shift every bucket past the target 5189 * down one, filling in that additional space. When we get back to the 5190 * target, we split the target between itself and the now-empty bucket 5191 * at target+1 (aka, target_blkno + blks_per_bucket). 5192 */ 5193 static int ocfs2_extend_xattr_bucket(struct inode *inode, 5194 handle_t *handle, 5195 struct ocfs2_xattr_bucket *first, 5196 u64 target_blk, 5197 u32 num_clusters) 5198 { 5199 int ret, credits; 5200 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5201 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 5202 u64 end_blk; 5203 u16 new_bucket = le16_to_cpu(bucket_xh(first)->xh_num_buckets); 5204 5205 trace_ocfs2_extend_xattr_bucket((unsigned long long)target_blk, 5206 (unsigned long long)bucket_blkno(first), 5207 num_clusters, new_bucket); 5208 5209 /* The extent must have room for an additional bucket */ 5210 BUG_ON(new_bucket >= 5211 (num_clusters * ocfs2_xattr_buckets_per_cluster(osb))); 5212 5213 /* end_blk points to the last existing bucket */ 5214 end_blk = bucket_blkno(first) + ((new_bucket - 1) * blk_per_bucket); 5215 5216 /* 5217 * end_blk is the start of the last existing bucket. 5218 * Thus, (end_blk - target_blk) covers the target bucket and 5219 * every bucket after it up to, but not including, the last 5220 * existing bucket. Then we add the last existing bucket, the 5221 * new bucket, and the first bucket (3 * blk_per_bucket). 5222 */ 5223 credits = (end_blk - target_blk) + (3 * blk_per_bucket); 5224 ret = ocfs2_extend_trans(handle, credits); 5225 if (ret) { 5226 mlog_errno(ret); 5227 goto out; 5228 } 5229 5230 ret = ocfs2_xattr_bucket_journal_access(handle, first, 5231 OCFS2_JOURNAL_ACCESS_WRITE); 5232 if (ret) { 5233 mlog_errno(ret); 5234 goto out; 5235 } 5236 5237 while (end_blk != target_blk) { 5238 ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk, 5239 end_blk + blk_per_bucket, 0); 5240 if (ret) 5241 goto out; 5242 end_blk -= blk_per_bucket; 5243 } 5244 5245 /* Move half of the xattr in target_blkno to the next bucket. */ 5246 ret = ocfs2_divide_xattr_bucket(inode, handle, target_blk, 5247 target_blk + blk_per_bucket, NULL, 0); 5248 5249 le16_add_cpu(&bucket_xh(first)->xh_num_buckets, 1); 5250 ocfs2_xattr_bucket_journal_dirty(handle, first); 5251 5252 out: 5253 return ret; 5254 } 5255 5256 /* 5257 * Add new xattr bucket in an extent record and adjust the buckets 5258 * accordingly. xb_bh is the ocfs2_xattr_block, and target is the 5259 * bucket we want to insert into. 5260 * 5261 * In the easy case, we will move all the buckets after target down by 5262 * one. Half of target's xattrs will be moved to the next bucket. 5263 * 5264 * If current cluster is full, we'll allocate a new one. This may not 5265 * be contiguous. The underlying calls will make sure that there is 5266 * space for the insert, shifting buckets around if necessary. 5267 * 'target' may be moved by those calls. 5268 */ 5269 static int ocfs2_add_new_xattr_bucket(struct inode *inode, 5270 struct buffer_head *xb_bh, 5271 struct ocfs2_xattr_bucket *target, 5272 struct ocfs2_xattr_set_ctxt *ctxt) 5273 { 5274 struct ocfs2_xattr_block *xb = 5275 (struct ocfs2_xattr_block *)xb_bh->b_data; 5276 struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root; 5277 struct ocfs2_extent_list *el = &xb_root->xt_list; 5278 u32 name_hash = 5279 le32_to_cpu(bucket_xh(target)->xh_entries[0].xe_name_hash); 5280 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5281 int ret, num_buckets, extend = 1; 5282 u64 p_blkno; 5283 u32 e_cpos, num_clusters; 5284 /* The bucket at the front of the extent */ 5285 struct ocfs2_xattr_bucket *first; 5286 5287 trace_ocfs2_add_new_xattr_bucket( 5288 (unsigned long long)bucket_blkno(target)); 5289 5290 /* The first bucket of the original extent */ 5291 first = ocfs2_xattr_bucket_new(inode); 5292 if (!first) { 5293 ret = -ENOMEM; 5294 mlog_errno(ret); 5295 goto out; 5296 } 5297 5298 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos, 5299 &num_clusters, el); 5300 if (ret) { 5301 mlog_errno(ret); 5302 goto out; 5303 } 5304 5305 ret = ocfs2_read_xattr_bucket(first, p_blkno); 5306 if (ret) { 5307 mlog_errno(ret); 5308 goto out; 5309 } 5310 5311 num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters; 5312 if (num_buckets == le16_to_cpu(bucket_xh(first)->xh_num_buckets)) { 5313 /* 5314 * This can move first+target if the target bucket moves 5315 * to the new extent. 5316 */ 5317 ret = ocfs2_add_new_xattr_cluster(inode, 5318 xb_bh, 5319 first, 5320 target, 5321 &num_clusters, 5322 e_cpos, 5323 &extend, 5324 ctxt); 5325 if (ret) { 5326 mlog_errno(ret); 5327 goto out; 5328 } 5329 } 5330 5331 if (extend) { 5332 ret = ocfs2_extend_xattr_bucket(inode, 5333 ctxt->handle, 5334 first, 5335 bucket_blkno(target), 5336 num_clusters); 5337 if (ret) 5338 mlog_errno(ret); 5339 } 5340 5341 out: 5342 ocfs2_xattr_bucket_free(first); 5343 5344 return ret; 5345 } 5346 5347 /* 5348 * Truncate the specified xe_off entry in xattr bucket. 5349 * bucket is indicated by header_bh and len is the new length. 5350 * Both the ocfs2_xattr_value_root and the entry will be updated here. 5351 * 5352 * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed. 5353 */ 5354 static int ocfs2_xattr_bucket_value_truncate(struct inode *inode, 5355 struct ocfs2_xattr_bucket *bucket, 5356 int xe_off, 5357 int len, 5358 struct ocfs2_xattr_set_ctxt *ctxt) 5359 { 5360 int ret, offset; 5361 u64 value_blk; 5362 struct ocfs2_xattr_entry *xe; 5363 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 5364 size_t blocksize = inode->i_sb->s_blocksize; 5365 struct ocfs2_xattr_value_buf vb = { 5366 .vb_access = ocfs2_journal_access, 5367 }; 5368 5369 xe = &xh->xh_entries[xe_off]; 5370 5371 BUG_ON(!xe || ocfs2_xattr_is_local(xe)); 5372 5373 offset = le16_to_cpu(xe->xe_name_offset) + 5374 OCFS2_XATTR_SIZE(xe->xe_name_len); 5375 5376 value_blk = offset / blocksize; 5377 5378 /* We don't allow ocfs2_xattr_value to be stored in different block. */ 5379 BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize); 5380 5381 vb.vb_bh = bucket->bu_bhs[value_blk]; 5382 BUG_ON(!vb.vb_bh); 5383 5384 vb.vb_xv = (struct ocfs2_xattr_value_root *) 5385 (vb.vb_bh->b_data + offset % blocksize); 5386 5387 /* 5388 * From here on out we have to dirty the bucket. The generic 5389 * value calls only modify one of the bucket's bhs, but we need 5390 * to send the bucket at once. So if they error, they *could* have 5391 * modified something. We have to assume they did, and dirty 5392 * the whole bucket. This leaves us in a consistent state. 5393 */ 5394 trace_ocfs2_xattr_bucket_value_truncate( 5395 (unsigned long long)bucket_blkno(bucket), xe_off, len); 5396 ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt); 5397 if (ret) { 5398 mlog_errno(ret); 5399 goto out; 5400 } 5401 5402 ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket, 5403 OCFS2_JOURNAL_ACCESS_WRITE); 5404 if (ret) { 5405 mlog_errno(ret); 5406 goto out; 5407 } 5408 5409 xe->xe_value_size = cpu_to_le64(len); 5410 5411 ocfs2_xattr_bucket_journal_dirty(ctxt->handle, bucket); 5412 5413 out: 5414 return ret; 5415 } 5416 5417 static int ocfs2_rm_xattr_cluster(struct inode *inode, 5418 struct buffer_head *root_bh, 5419 u64 blkno, 5420 u32 cpos, 5421 u32 len, 5422 void *para) 5423 { 5424 int ret; 5425 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5426 struct inode *tl_inode = osb->osb_tl_inode; 5427 handle_t *handle; 5428 struct ocfs2_xattr_block *xb = 5429 (struct ocfs2_xattr_block *)root_bh->b_data; 5430 struct ocfs2_alloc_context *meta_ac = NULL; 5431 struct ocfs2_cached_dealloc_ctxt dealloc; 5432 struct ocfs2_extent_tree et; 5433 5434 ret = ocfs2_iterate_xattr_buckets(inode, blkno, len, 5435 ocfs2_delete_xattr_in_bucket, para); 5436 if (ret) { 5437 mlog_errno(ret); 5438 return ret; 5439 } 5440 5441 ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh); 5442 5443 ocfs2_init_dealloc_ctxt(&dealloc); 5444 5445 trace_ocfs2_rm_xattr_cluster( 5446 (unsigned long long)OCFS2_I(inode)->ip_blkno, 5447 (unsigned long long)blkno, cpos, len); 5448 5449 ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), blkno, 5450 len); 5451 5452 ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac); 5453 if (ret) { 5454 mlog_errno(ret); 5455 return ret; 5456 } 5457 5458 inode_lock(tl_inode); 5459 5460 if (ocfs2_truncate_log_needs_flush(osb)) { 5461 ret = __ocfs2_flush_truncate_log(osb); 5462 if (ret < 0) { 5463 mlog_errno(ret); 5464 goto out; 5465 } 5466 } 5467 5468 handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb)); 5469 if (IS_ERR(handle)) { 5470 ret = -ENOMEM; 5471 mlog_errno(ret); 5472 goto out; 5473 } 5474 5475 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh, 5476 OCFS2_JOURNAL_ACCESS_WRITE); 5477 if (ret) { 5478 mlog_errno(ret); 5479 goto out_commit; 5480 } 5481 5482 ret = ocfs2_remove_extent(handle, &et, cpos, len, meta_ac, 5483 &dealloc); 5484 if (ret) { 5485 mlog_errno(ret); 5486 goto out_commit; 5487 } 5488 5489 le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len); 5490 ocfs2_journal_dirty(handle, root_bh); 5491 5492 ret = ocfs2_truncate_log_append(osb, handle, blkno, len); 5493 if (ret) 5494 mlog_errno(ret); 5495 ocfs2_update_inode_fsync_trans(handle, inode, 0); 5496 5497 out_commit: 5498 ocfs2_commit_trans(osb, handle); 5499 out: 5500 ocfs2_schedule_truncate_log_flush(osb, 1); 5501 5502 inode_unlock(tl_inode); 5503 5504 if (meta_ac) 5505 ocfs2_free_alloc_context(meta_ac); 5506 5507 ocfs2_run_deallocs(osb, &dealloc); 5508 5509 return ret; 5510 } 5511 5512 /* 5513 * check whether the xattr bucket is filled up with the same hash value. 5514 * If we want to insert the xattr with the same hash, return -ENOSPC. 5515 * If we want to insert a xattr with different hash value, go ahead 5516 * and ocfs2_divide_xattr_bucket will handle this. 5517 */ 5518 static int ocfs2_check_xattr_bucket_collision(struct inode *inode, 5519 struct ocfs2_xattr_bucket *bucket, 5520 const char *name) 5521 { 5522 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 5523 u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name)); 5524 5525 if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash)) 5526 return 0; 5527 5528 if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash == 5529 xh->xh_entries[0].xe_name_hash) { 5530 mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, " 5531 "hash = %u\n", 5532 (unsigned long long)bucket_blkno(bucket), 5533 le32_to_cpu(xh->xh_entries[0].xe_name_hash)); 5534 return -ENOSPC; 5535 } 5536 5537 return 0; 5538 } 5539 5540 /* 5541 * Try to set the entry in the current bucket. If we fail, the caller 5542 * will handle getting us another bucket. 5543 */ 5544 static int ocfs2_xattr_set_entry_bucket(struct inode *inode, 5545 struct ocfs2_xattr_info *xi, 5546 struct ocfs2_xattr_search *xs, 5547 struct ocfs2_xattr_set_ctxt *ctxt) 5548 { 5549 int ret; 5550 struct ocfs2_xa_loc loc; 5551 5552 trace_ocfs2_xattr_set_entry_bucket(xi->xi_name); 5553 5554 ocfs2_init_xattr_bucket_xa_loc(&loc, xs->bucket, 5555 xs->not_found ? NULL : xs->here); 5556 ret = ocfs2_xa_set(&loc, xi, ctxt); 5557 if (!ret) { 5558 xs->here = loc.xl_entry; 5559 goto out; 5560 } 5561 if (ret != -ENOSPC) { 5562 mlog_errno(ret); 5563 goto out; 5564 } 5565 5566 /* Ok, we need space. Let's try defragmenting the bucket. */ 5567 ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle, 5568 xs->bucket); 5569 if (ret) { 5570 mlog_errno(ret); 5571 goto out; 5572 } 5573 5574 ret = ocfs2_xa_set(&loc, xi, ctxt); 5575 if (!ret) { 5576 xs->here = loc.xl_entry; 5577 goto out; 5578 } 5579 if (ret != -ENOSPC) 5580 mlog_errno(ret); 5581 5582 5583 out: 5584 return ret; 5585 } 5586 5587 static int ocfs2_xattr_set_entry_index_block(struct inode *inode, 5588 struct ocfs2_xattr_info *xi, 5589 struct ocfs2_xattr_search *xs, 5590 struct ocfs2_xattr_set_ctxt *ctxt) 5591 { 5592 int ret; 5593 5594 trace_ocfs2_xattr_set_entry_index_block(xi->xi_name); 5595 5596 ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt); 5597 if (!ret) 5598 goto out; 5599 if (ret != -ENOSPC) { 5600 mlog_errno(ret); 5601 goto out; 5602 } 5603 5604 /* Ack, need more space. Let's try to get another bucket! */ 5605 5606 /* 5607 * We do not allow for overlapping ranges between buckets. And 5608 * the maximum number of collisions we will allow for then is 5609 * one bucket's worth, so check it here whether we need to 5610 * add a new bucket for the insert. 5611 */ 5612 ret = ocfs2_check_xattr_bucket_collision(inode, 5613 xs->bucket, 5614 xi->xi_name); 5615 if (ret) { 5616 mlog_errno(ret); 5617 goto out; 5618 } 5619 5620 ret = ocfs2_add_new_xattr_bucket(inode, 5621 xs->xattr_bh, 5622 xs->bucket, 5623 ctxt); 5624 if (ret) { 5625 mlog_errno(ret); 5626 goto out; 5627 } 5628 5629 /* 5630 * ocfs2_add_new_xattr_bucket() will have updated 5631 * xs->bucket if it moved, but it will not have updated 5632 * any of the other search fields. Thus, we drop it and 5633 * re-search. Everything should be cached, so it'll be 5634 * quick. 5635 */ 5636 ocfs2_xattr_bucket_relse(xs->bucket); 5637 ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh, 5638 xi->xi_name_index, 5639 xi->xi_name, xs); 5640 if (ret && ret != -ENODATA) 5641 goto out; 5642 xs->not_found = ret; 5643 5644 /* Ok, we have a new bucket, let's try again */ 5645 ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt); 5646 if (ret && (ret != -ENOSPC)) 5647 mlog_errno(ret); 5648 5649 out: 5650 return ret; 5651 } 5652 5653 static int ocfs2_delete_xattr_in_bucket(struct inode *inode, 5654 struct ocfs2_xattr_bucket *bucket, 5655 void *para) 5656 { 5657 int ret = 0, ref_credits; 5658 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 5659 u16 i; 5660 struct ocfs2_xattr_entry *xe; 5661 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5662 struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,}; 5663 int credits = ocfs2_remove_extent_credits(osb->sb) + 5664 ocfs2_blocks_per_xattr_bucket(inode->i_sb); 5665 struct ocfs2_xattr_value_root *xv; 5666 struct ocfs2_rm_xattr_bucket_para *args = 5667 (struct ocfs2_rm_xattr_bucket_para *)para; 5668 5669 ocfs2_init_dealloc_ctxt(&ctxt.dealloc); 5670 5671 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 5672 xe = &xh->xh_entries[i]; 5673 if (ocfs2_xattr_is_local(xe)) 5674 continue; 5675 5676 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, 5677 i, &xv, NULL); 5678 if (ret) { 5679 mlog_errno(ret); 5680 break; 5681 } 5682 5683 ret = ocfs2_lock_xattr_remove_allocators(inode, xv, 5684 args->ref_ci, 5685 args->ref_root_bh, 5686 &ctxt.meta_ac, 5687 &ref_credits); 5688 5689 ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits); 5690 if (IS_ERR(ctxt.handle)) { 5691 ret = PTR_ERR(ctxt.handle); 5692 mlog_errno(ret); 5693 break; 5694 } 5695 5696 ret = ocfs2_xattr_bucket_value_truncate(inode, bucket, 5697 i, 0, &ctxt); 5698 5699 ocfs2_commit_trans(osb, ctxt.handle); 5700 if (ctxt.meta_ac) { 5701 ocfs2_free_alloc_context(ctxt.meta_ac); 5702 ctxt.meta_ac = NULL; 5703 } 5704 if (ret) { 5705 mlog_errno(ret); 5706 break; 5707 } 5708 } 5709 5710 if (ctxt.meta_ac) 5711 ocfs2_free_alloc_context(ctxt.meta_ac); 5712 ocfs2_schedule_truncate_log_flush(osb, 1); 5713 ocfs2_run_deallocs(osb, &ctxt.dealloc); 5714 return ret; 5715 } 5716 5717 /* 5718 * Whenever we modify a xattr value root in the bucket(e.g, CoW 5719 * or change the extent record flag), we need to recalculate 5720 * the metaecc for the whole bucket. So it is done here. 5721 * 5722 * Note: 5723 * We have to give the extra credits for the caller. 5724 */ 5725 static int ocfs2_xattr_bucket_post_refcount(struct inode *inode, 5726 handle_t *handle, 5727 void *para) 5728 { 5729 int ret; 5730 struct ocfs2_xattr_bucket *bucket = 5731 (struct ocfs2_xattr_bucket *)para; 5732 5733 ret = ocfs2_xattr_bucket_journal_access(handle, bucket, 5734 OCFS2_JOURNAL_ACCESS_WRITE); 5735 if (ret) { 5736 mlog_errno(ret); 5737 return ret; 5738 } 5739 5740 ocfs2_xattr_bucket_journal_dirty(handle, bucket); 5741 5742 return 0; 5743 } 5744 5745 /* 5746 * Special action we need if the xattr value is refcounted. 5747 * 5748 * 1. If the xattr is refcounted, lock the tree. 5749 * 2. CoW the xattr if we are setting the new value and the value 5750 * will be stored outside. 5751 * 3. In other case, decrease_refcount will work for us, so just 5752 * lock the refcount tree, calculate the meta and credits is OK. 5753 * 5754 * We have to do CoW before ocfs2_init_xattr_set_ctxt since 5755 * currently CoW is a completed transaction, while this function 5756 * will also lock the allocators and let us deadlock. So we will 5757 * CoW the whole xattr value. 5758 */ 5759 static int ocfs2_prepare_refcount_xattr(struct inode *inode, 5760 struct ocfs2_dinode *di, 5761 struct ocfs2_xattr_info *xi, 5762 struct ocfs2_xattr_search *xis, 5763 struct ocfs2_xattr_search *xbs, 5764 struct ocfs2_refcount_tree **ref_tree, 5765 int *meta_add, 5766 int *credits) 5767 { 5768 int ret = 0; 5769 struct ocfs2_xattr_block *xb; 5770 struct ocfs2_xattr_entry *xe; 5771 char *base; 5772 u32 p_cluster, num_clusters; 5773 unsigned int ext_flags; 5774 int name_offset, name_len; 5775 struct ocfs2_xattr_value_buf vb; 5776 struct ocfs2_xattr_bucket *bucket = NULL; 5777 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5778 struct ocfs2_post_refcount refcount; 5779 struct ocfs2_post_refcount *p = NULL; 5780 struct buffer_head *ref_root_bh = NULL; 5781 5782 if (!xis->not_found) { 5783 xe = xis->here; 5784 name_offset = le16_to_cpu(xe->xe_name_offset); 5785 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 5786 base = xis->base; 5787 vb.vb_bh = xis->inode_bh; 5788 vb.vb_access = ocfs2_journal_access_di; 5789 } else { 5790 int i, block_off = 0; 5791 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; 5792 xe = xbs->here; 5793 name_offset = le16_to_cpu(xe->xe_name_offset); 5794 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 5795 i = xbs->here - xbs->header->xh_entries; 5796 5797 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 5798 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 5799 bucket_xh(xbs->bucket), 5800 i, &block_off, 5801 &name_offset); 5802 if (ret) { 5803 mlog_errno(ret); 5804 goto out; 5805 } 5806 base = bucket_block(xbs->bucket, block_off); 5807 vb.vb_bh = xbs->bucket->bu_bhs[block_off]; 5808 vb.vb_access = ocfs2_journal_access; 5809 5810 if (ocfs2_meta_ecc(osb)) { 5811 /*create parameters for ocfs2_post_refcount. */ 5812 bucket = xbs->bucket; 5813 refcount.credits = bucket->bu_blocks; 5814 refcount.para = bucket; 5815 refcount.func = 5816 ocfs2_xattr_bucket_post_refcount; 5817 p = &refcount; 5818 } 5819 } else { 5820 base = xbs->base; 5821 vb.vb_bh = xbs->xattr_bh; 5822 vb.vb_access = ocfs2_journal_access_xb; 5823 } 5824 } 5825 5826 if (ocfs2_xattr_is_local(xe)) 5827 goto out; 5828 5829 vb.vb_xv = (struct ocfs2_xattr_value_root *) 5830 (base + name_offset + name_len); 5831 5832 ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster, 5833 &num_clusters, &vb.vb_xv->xr_list, 5834 &ext_flags); 5835 if (ret) { 5836 mlog_errno(ret); 5837 goto out; 5838 } 5839 5840 /* 5841 * We just need to check the 1st extent record, since we always 5842 * CoW the whole xattr. So there shouldn't be a xattr with 5843 * some REFCOUNT extent recs after the 1st one. 5844 */ 5845 if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) 5846 goto out; 5847 5848 ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc), 5849 1, ref_tree, &ref_root_bh); 5850 if (ret) { 5851 mlog_errno(ret); 5852 goto out; 5853 } 5854 5855 /* 5856 * If we are deleting the xattr or the new size will be stored inside, 5857 * cool, leave it there, the xattr truncate process will remove them 5858 * for us(it still needs the refcount tree lock and the meta, credits). 5859 * And the worse case is that every cluster truncate will split the 5860 * refcount tree, and make the original extent become 3. So we will need 5861 * 2 * cluster more extent recs at most. 5862 */ 5863 if (!xi->xi_value || xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE) { 5864 5865 ret = ocfs2_refcounted_xattr_delete_need(inode, 5866 &(*ref_tree)->rf_ci, 5867 ref_root_bh, vb.vb_xv, 5868 meta_add, credits); 5869 if (ret) 5870 mlog_errno(ret); 5871 goto out; 5872 } 5873 5874 ret = ocfs2_refcount_cow_xattr(inode, di, &vb, 5875 *ref_tree, ref_root_bh, 0, 5876 le32_to_cpu(vb.vb_xv->xr_clusters), p); 5877 if (ret) 5878 mlog_errno(ret); 5879 5880 out: 5881 brelse(ref_root_bh); 5882 return ret; 5883 } 5884 5885 /* 5886 * Add the REFCOUNTED flags for all the extent rec in ocfs2_xattr_value_root. 5887 * The physical clusters will be added to refcount tree. 5888 */ 5889 static int ocfs2_xattr_value_attach_refcount(struct inode *inode, 5890 struct ocfs2_xattr_value_root *xv, 5891 struct ocfs2_extent_tree *value_et, 5892 struct ocfs2_caching_info *ref_ci, 5893 struct buffer_head *ref_root_bh, 5894 struct ocfs2_cached_dealloc_ctxt *dealloc, 5895 struct ocfs2_post_refcount *refcount) 5896 { 5897 int ret = 0; 5898 u32 clusters = le32_to_cpu(xv->xr_clusters); 5899 u32 cpos, p_cluster, num_clusters; 5900 struct ocfs2_extent_list *el = &xv->xr_list; 5901 unsigned int ext_flags; 5902 5903 cpos = 0; 5904 while (cpos < clusters) { 5905 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, 5906 &num_clusters, el, &ext_flags); 5907 if (ret) { 5908 mlog_errno(ret); 5909 break; 5910 } 5911 5912 cpos += num_clusters; 5913 if ((ext_flags & OCFS2_EXT_REFCOUNTED)) 5914 continue; 5915 5916 BUG_ON(!p_cluster); 5917 5918 ret = ocfs2_add_refcount_flag(inode, value_et, 5919 ref_ci, ref_root_bh, 5920 cpos - num_clusters, 5921 p_cluster, num_clusters, 5922 dealloc, refcount); 5923 if (ret) { 5924 mlog_errno(ret); 5925 break; 5926 } 5927 } 5928 5929 return ret; 5930 } 5931 5932 /* 5933 * Given a normal ocfs2_xattr_header, refcount all the entries which 5934 * have value stored outside. 5935 * Used for xattrs stored in inode and ocfs2_xattr_block. 5936 */ 5937 static int ocfs2_xattr_attach_refcount_normal(struct inode *inode, 5938 struct ocfs2_xattr_value_buf *vb, 5939 struct ocfs2_xattr_header *header, 5940 struct ocfs2_caching_info *ref_ci, 5941 struct buffer_head *ref_root_bh, 5942 struct ocfs2_cached_dealloc_ctxt *dealloc) 5943 { 5944 5945 struct ocfs2_xattr_entry *xe; 5946 struct ocfs2_xattr_value_root *xv; 5947 struct ocfs2_extent_tree et; 5948 int i, ret = 0; 5949 5950 for (i = 0; i < le16_to_cpu(header->xh_count); i++) { 5951 xe = &header->xh_entries[i]; 5952 5953 if (ocfs2_xattr_is_local(xe)) 5954 continue; 5955 5956 xv = (struct ocfs2_xattr_value_root *)((void *)header + 5957 le16_to_cpu(xe->xe_name_offset) + 5958 OCFS2_XATTR_SIZE(xe->xe_name_len)); 5959 5960 vb->vb_xv = xv; 5961 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); 5962 5963 ret = ocfs2_xattr_value_attach_refcount(inode, xv, &et, 5964 ref_ci, ref_root_bh, 5965 dealloc, NULL); 5966 if (ret) { 5967 mlog_errno(ret); 5968 break; 5969 } 5970 } 5971 5972 return ret; 5973 } 5974 5975 static int ocfs2_xattr_inline_attach_refcount(struct inode *inode, 5976 struct buffer_head *fe_bh, 5977 struct ocfs2_caching_info *ref_ci, 5978 struct buffer_head *ref_root_bh, 5979 struct ocfs2_cached_dealloc_ctxt *dealloc) 5980 { 5981 struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data; 5982 struct ocfs2_xattr_header *header = (struct ocfs2_xattr_header *) 5983 (fe_bh->b_data + inode->i_sb->s_blocksize - 5984 le16_to_cpu(di->i_xattr_inline_size)); 5985 struct ocfs2_xattr_value_buf vb = { 5986 .vb_bh = fe_bh, 5987 .vb_access = ocfs2_journal_access_di, 5988 }; 5989 5990 return ocfs2_xattr_attach_refcount_normal(inode, &vb, header, 5991 ref_ci, ref_root_bh, dealloc); 5992 } 5993 5994 struct ocfs2_xattr_tree_value_refcount_para { 5995 struct ocfs2_caching_info *ref_ci; 5996 struct buffer_head *ref_root_bh; 5997 struct ocfs2_cached_dealloc_ctxt *dealloc; 5998 }; 5999 6000 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb, 6001 struct ocfs2_xattr_bucket *bucket, 6002 int offset, 6003 struct ocfs2_xattr_value_root **xv, 6004 struct buffer_head **bh) 6005 { 6006 int ret, block_off, name_offset; 6007 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 6008 struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset]; 6009 void *base; 6010 6011 ret = ocfs2_xattr_bucket_get_name_value(sb, 6012 bucket_xh(bucket), 6013 offset, 6014 &block_off, 6015 &name_offset); 6016 if (ret) { 6017 mlog_errno(ret); 6018 goto out; 6019 } 6020 6021 base = bucket_block(bucket, block_off); 6022 6023 *xv = (struct ocfs2_xattr_value_root *)(base + name_offset + 6024 OCFS2_XATTR_SIZE(xe->xe_name_len)); 6025 6026 if (bh) 6027 *bh = bucket->bu_bhs[block_off]; 6028 out: 6029 return ret; 6030 } 6031 6032 /* 6033 * For a given xattr bucket, refcount all the entries which 6034 * have value stored outside. 6035 */ 6036 static int ocfs2_xattr_bucket_value_refcount(struct inode *inode, 6037 struct ocfs2_xattr_bucket *bucket, 6038 void *para) 6039 { 6040 int i, ret = 0; 6041 struct ocfs2_extent_tree et; 6042 struct ocfs2_xattr_tree_value_refcount_para *ref = 6043 (struct ocfs2_xattr_tree_value_refcount_para *)para; 6044 struct ocfs2_xattr_header *xh = 6045 (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data; 6046 struct ocfs2_xattr_entry *xe; 6047 struct ocfs2_xattr_value_buf vb = { 6048 .vb_access = ocfs2_journal_access, 6049 }; 6050 struct ocfs2_post_refcount refcount = { 6051 .credits = bucket->bu_blocks, 6052 .para = bucket, 6053 .func = ocfs2_xattr_bucket_post_refcount, 6054 }; 6055 struct ocfs2_post_refcount *p = NULL; 6056 6057 /* We only need post_refcount if we support metaecc. */ 6058 if (ocfs2_meta_ecc(OCFS2_SB(inode->i_sb))) 6059 p = &refcount; 6060 6061 trace_ocfs2_xattr_bucket_value_refcount( 6062 (unsigned long long)bucket_blkno(bucket), 6063 le16_to_cpu(xh->xh_count)); 6064 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 6065 xe = &xh->xh_entries[i]; 6066 6067 if (ocfs2_xattr_is_local(xe)) 6068 continue; 6069 6070 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, i, 6071 &vb.vb_xv, &vb.vb_bh); 6072 if (ret) { 6073 mlog_errno(ret); 6074 break; 6075 } 6076 6077 ocfs2_init_xattr_value_extent_tree(&et, 6078 INODE_CACHE(inode), &vb); 6079 6080 ret = ocfs2_xattr_value_attach_refcount(inode, vb.vb_xv, 6081 &et, ref->ref_ci, 6082 ref->ref_root_bh, 6083 ref->dealloc, p); 6084 if (ret) { 6085 mlog_errno(ret); 6086 break; 6087 } 6088 } 6089 6090 return ret; 6091 6092 } 6093 6094 static int ocfs2_refcount_xattr_tree_rec(struct inode *inode, 6095 struct buffer_head *root_bh, 6096 u64 blkno, u32 cpos, u32 len, void *para) 6097 { 6098 return ocfs2_iterate_xattr_buckets(inode, blkno, len, 6099 ocfs2_xattr_bucket_value_refcount, 6100 para); 6101 } 6102 6103 static int ocfs2_xattr_block_attach_refcount(struct inode *inode, 6104 struct buffer_head *blk_bh, 6105 struct ocfs2_caching_info *ref_ci, 6106 struct buffer_head *ref_root_bh, 6107 struct ocfs2_cached_dealloc_ctxt *dealloc) 6108 { 6109 int ret = 0; 6110 struct ocfs2_xattr_block *xb = 6111 (struct ocfs2_xattr_block *)blk_bh->b_data; 6112 6113 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 6114 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header; 6115 struct ocfs2_xattr_value_buf vb = { 6116 .vb_bh = blk_bh, 6117 .vb_access = ocfs2_journal_access_xb, 6118 }; 6119 6120 ret = ocfs2_xattr_attach_refcount_normal(inode, &vb, header, 6121 ref_ci, ref_root_bh, 6122 dealloc); 6123 } else { 6124 struct ocfs2_xattr_tree_value_refcount_para para = { 6125 .ref_ci = ref_ci, 6126 .ref_root_bh = ref_root_bh, 6127 .dealloc = dealloc, 6128 }; 6129 6130 ret = ocfs2_iterate_xattr_index_block(inode, blk_bh, 6131 ocfs2_refcount_xattr_tree_rec, 6132 ¶); 6133 } 6134 6135 return ret; 6136 } 6137 6138 int ocfs2_xattr_attach_refcount_tree(struct inode *inode, 6139 struct buffer_head *fe_bh, 6140 struct ocfs2_caching_info *ref_ci, 6141 struct buffer_head *ref_root_bh, 6142 struct ocfs2_cached_dealloc_ctxt *dealloc) 6143 { 6144 int ret = 0; 6145 struct ocfs2_inode_info *oi = OCFS2_I(inode); 6146 struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data; 6147 struct buffer_head *blk_bh = NULL; 6148 6149 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 6150 ret = ocfs2_xattr_inline_attach_refcount(inode, fe_bh, 6151 ref_ci, ref_root_bh, 6152 dealloc); 6153 if (ret) { 6154 mlog_errno(ret); 6155 goto out; 6156 } 6157 } 6158 6159 if (!di->i_xattr_loc) 6160 goto out; 6161 6162 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc), 6163 &blk_bh); 6164 if (ret < 0) { 6165 mlog_errno(ret); 6166 goto out; 6167 } 6168 6169 ret = ocfs2_xattr_block_attach_refcount(inode, blk_bh, ref_ci, 6170 ref_root_bh, dealloc); 6171 if (ret) 6172 mlog_errno(ret); 6173 6174 brelse(blk_bh); 6175 out: 6176 6177 return ret; 6178 } 6179 6180 typedef int (should_xattr_reflinked)(struct ocfs2_xattr_entry *xe); 6181 /* 6182 * Store the information we need in xattr reflink. 6183 * old_bh and new_bh are inode bh for the old and new inode. 6184 */ 6185 struct ocfs2_xattr_reflink { 6186 struct inode *old_inode; 6187 struct inode *new_inode; 6188 struct buffer_head *old_bh; 6189 struct buffer_head *new_bh; 6190 struct ocfs2_caching_info *ref_ci; 6191 struct buffer_head *ref_root_bh; 6192 struct ocfs2_cached_dealloc_ctxt *dealloc; 6193 should_xattr_reflinked *xattr_reflinked; 6194 }; 6195 6196 /* 6197 * Given a xattr header and xe offset, 6198 * return the proper xv and the corresponding bh. 6199 * xattr in inode, block and xattr tree have different implementaions. 6200 */ 6201 typedef int (get_xattr_value_root)(struct super_block *sb, 6202 struct buffer_head *bh, 6203 struct ocfs2_xattr_header *xh, 6204 int offset, 6205 struct ocfs2_xattr_value_root **xv, 6206 struct buffer_head **ret_bh, 6207 void *para); 6208 6209 /* 6210 * Calculate all the xattr value root metadata stored in this xattr header and 6211 * credits we need if we create them from the scratch. 6212 * We use get_xattr_value_root so that all types of xattr container can use it. 6213 */ 6214 static int ocfs2_value_metas_in_xattr_header(struct super_block *sb, 6215 struct buffer_head *bh, 6216 struct ocfs2_xattr_header *xh, 6217 int *metas, int *credits, 6218 int *num_recs, 6219 get_xattr_value_root *func, 6220 void *para) 6221 { 6222 int i, ret = 0; 6223 struct ocfs2_xattr_value_root *xv; 6224 struct ocfs2_xattr_entry *xe; 6225 6226 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 6227 xe = &xh->xh_entries[i]; 6228 if (ocfs2_xattr_is_local(xe)) 6229 continue; 6230 6231 ret = func(sb, bh, xh, i, &xv, NULL, para); 6232 if (ret) { 6233 mlog_errno(ret); 6234 break; 6235 } 6236 6237 *metas += le16_to_cpu(xv->xr_list.l_tree_depth) * 6238 le16_to_cpu(xv->xr_list.l_next_free_rec); 6239 6240 *credits += ocfs2_calc_extend_credits(sb, 6241 &def_xv.xv.xr_list); 6242 6243 /* 6244 * If the value is a tree with depth > 1, We don't go deep 6245 * to the extent block, so just calculate a maximum record num. 6246 */ 6247 if (!xv->xr_list.l_tree_depth) 6248 *num_recs += le16_to_cpu(xv->xr_list.l_next_free_rec); 6249 else 6250 *num_recs += ocfs2_clusters_for_bytes(sb, 6251 XATTR_SIZE_MAX); 6252 } 6253 6254 return ret; 6255 } 6256 6257 /* Used by xattr inode and block to return the right xv and buffer_head. */ 6258 static int ocfs2_get_xattr_value_root(struct super_block *sb, 6259 struct buffer_head *bh, 6260 struct ocfs2_xattr_header *xh, 6261 int offset, 6262 struct ocfs2_xattr_value_root **xv, 6263 struct buffer_head **ret_bh, 6264 void *para) 6265 { 6266 struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset]; 6267 6268 *xv = (struct ocfs2_xattr_value_root *)((void *)xh + 6269 le16_to_cpu(xe->xe_name_offset) + 6270 OCFS2_XATTR_SIZE(xe->xe_name_len)); 6271 6272 if (ret_bh) 6273 *ret_bh = bh; 6274 6275 return 0; 6276 } 6277 6278 /* 6279 * Lock the meta_ac and caculate how much credits we need for reflink xattrs. 6280 * It is only used for inline xattr and xattr block. 6281 */ 6282 static int ocfs2_reflink_lock_xattr_allocators(struct ocfs2_super *osb, 6283 struct ocfs2_xattr_header *xh, 6284 struct buffer_head *ref_root_bh, 6285 int *credits, 6286 struct ocfs2_alloc_context **meta_ac) 6287 { 6288 int ret, meta_add = 0, num_recs = 0; 6289 struct ocfs2_refcount_block *rb = 6290 (struct ocfs2_refcount_block *)ref_root_bh->b_data; 6291 6292 *credits = 0; 6293 6294 ret = ocfs2_value_metas_in_xattr_header(osb->sb, NULL, xh, 6295 &meta_add, credits, &num_recs, 6296 ocfs2_get_xattr_value_root, 6297 NULL); 6298 if (ret) { 6299 mlog_errno(ret); 6300 goto out; 6301 } 6302 6303 /* 6304 * We need to add/modify num_recs in refcount tree, so just calculate 6305 * an approximate number we need for refcount tree change. 6306 * Sometimes we need to split the tree, and after split, half recs 6307 * will be moved to the new block, and a new block can only provide 6308 * half number of recs. So we multiple new blocks by 2. 6309 */ 6310 num_recs = num_recs / ocfs2_refcount_recs_per_rb(osb->sb) * 2; 6311 meta_add += num_recs; 6312 *credits += num_recs + num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS; 6313 if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL) 6314 *credits += le16_to_cpu(rb->rf_list.l_tree_depth) * 6315 le16_to_cpu(rb->rf_list.l_next_free_rec) + 1; 6316 else 6317 *credits += 1; 6318 6319 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, meta_ac); 6320 if (ret) 6321 mlog_errno(ret); 6322 6323 out: 6324 return ret; 6325 } 6326 6327 /* 6328 * Given a xattr header, reflink all the xattrs in this container. 6329 * It can be used for inode, block and bucket. 6330 * 6331 * NOTE: 6332 * Before we call this function, the caller has memcpy the xattr in 6333 * old_xh to the new_xh. 6334 * 6335 * If args.xattr_reflinked is set, call it to decide whether the xe should 6336 * be reflinked or not. If not, remove it from the new xattr header. 6337 */ 6338 static int ocfs2_reflink_xattr_header(handle_t *handle, 6339 struct ocfs2_xattr_reflink *args, 6340 struct buffer_head *old_bh, 6341 struct ocfs2_xattr_header *xh, 6342 struct buffer_head *new_bh, 6343 struct ocfs2_xattr_header *new_xh, 6344 struct ocfs2_xattr_value_buf *vb, 6345 struct ocfs2_alloc_context *meta_ac, 6346 get_xattr_value_root *func, 6347 void *para) 6348 { 6349 int ret = 0, i, j; 6350 struct super_block *sb = args->old_inode->i_sb; 6351 struct buffer_head *value_bh; 6352 struct ocfs2_xattr_entry *xe, *last; 6353 struct ocfs2_xattr_value_root *xv, *new_xv; 6354 struct ocfs2_extent_tree data_et; 6355 u32 clusters, cpos, p_cluster, num_clusters; 6356 unsigned int ext_flags = 0; 6357 6358 trace_ocfs2_reflink_xattr_header((unsigned long long)old_bh->b_blocknr, 6359 le16_to_cpu(xh->xh_count)); 6360 6361 last = &new_xh->xh_entries[le16_to_cpu(new_xh->xh_count)]; 6362 for (i = 0, j = 0; i < le16_to_cpu(xh->xh_count); i++, j++) { 6363 xe = &xh->xh_entries[i]; 6364 6365 if (args->xattr_reflinked && !args->xattr_reflinked(xe)) { 6366 xe = &new_xh->xh_entries[j]; 6367 6368 le16_add_cpu(&new_xh->xh_count, -1); 6369 if (new_xh->xh_count) { 6370 memmove(xe, xe + 1, 6371 (void *)last - (void *)xe); 6372 memset(last, 0, 6373 sizeof(struct ocfs2_xattr_entry)); 6374 } 6375 6376 /* 6377 * We don't want j to increase in the next round since 6378 * it is already moved ahead. 6379 */ 6380 j--; 6381 continue; 6382 } 6383 6384 if (ocfs2_xattr_is_local(xe)) 6385 continue; 6386 6387 ret = func(sb, old_bh, xh, i, &xv, NULL, para); 6388 if (ret) { 6389 mlog_errno(ret); 6390 break; 6391 } 6392 6393 ret = func(sb, new_bh, new_xh, j, &new_xv, &value_bh, para); 6394 if (ret) { 6395 mlog_errno(ret); 6396 break; 6397 } 6398 6399 /* 6400 * For the xattr which has l_tree_depth = 0, all the extent 6401 * recs have already be copied to the new xh with the 6402 * propriate OCFS2_EXT_REFCOUNTED flag we just need to 6403 * increase the refount count int the refcount tree. 6404 * 6405 * For the xattr which has l_tree_depth > 0, we need 6406 * to initialize it to the empty default value root, 6407 * and then insert the extents one by one. 6408 */ 6409 if (xv->xr_list.l_tree_depth) { 6410 memcpy(new_xv, &def_xv, OCFS2_XATTR_ROOT_SIZE); 6411 vb->vb_xv = new_xv; 6412 vb->vb_bh = value_bh; 6413 ocfs2_init_xattr_value_extent_tree(&data_et, 6414 INODE_CACHE(args->new_inode), vb); 6415 } 6416 6417 clusters = le32_to_cpu(xv->xr_clusters); 6418 cpos = 0; 6419 while (cpos < clusters) { 6420 ret = ocfs2_xattr_get_clusters(args->old_inode, 6421 cpos, 6422 &p_cluster, 6423 &num_clusters, 6424 &xv->xr_list, 6425 &ext_flags); 6426 if (ret) { 6427 mlog_errno(ret); 6428 goto out; 6429 } 6430 6431 BUG_ON(!p_cluster); 6432 6433 if (xv->xr_list.l_tree_depth) { 6434 ret = ocfs2_insert_extent(handle, 6435 &data_et, cpos, 6436 ocfs2_clusters_to_blocks( 6437 args->old_inode->i_sb, 6438 p_cluster), 6439 num_clusters, ext_flags, 6440 meta_ac); 6441 if (ret) { 6442 mlog_errno(ret); 6443 goto out; 6444 } 6445 } 6446 6447 ret = ocfs2_increase_refcount(handle, args->ref_ci, 6448 args->ref_root_bh, 6449 p_cluster, num_clusters, 6450 meta_ac, args->dealloc); 6451 if (ret) { 6452 mlog_errno(ret); 6453 goto out; 6454 } 6455 6456 cpos += num_clusters; 6457 } 6458 } 6459 6460 out: 6461 return ret; 6462 } 6463 6464 static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args) 6465 { 6466 int ret = 0, credits = 0; 6467 handle_t *handle; 6468 struct ocfs2_super *osb = OCFS2_SB(args->old_inode->i_sb); 6469 struct ocfs2_dinode *di = (struct ocfs2_dinode *)args->old_bh->b_data; 6470 int inline_size = le16_to_cpu(di->i_xattr_inline_size); 6471 int header_off = osb->sb->s_blocksize - inline_size; 6472 struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *) 6473 (args->old_bh->b_data + header_off); 6474 struct ocfs2_xattr_header *new_xh = (struct ocfs2_xattr_header *) 6475 (args->new_bh->b_data + header_off); 6476 struct ocfs2_alloc_context *meta_ac = NULL; 6477 struct ocfs2_inode_info *new_oi; 6478 struct ocfs2_dinode *new_di; 6479 struct ocfs2_xattr_value_buf vb = { 6480 .vb_bh = args->new_bh, 6481 .vb_access = ocfs2_journal_access_di, 6482 }; 6483 6484 ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh, 6485 &credits, &meta_ac); 6486 if (ret) { 6487 mlog_errno(ret); 6488 goto out; 6489 } 6490 6491 handle = ocfs2_start_trans(osb, credits); 6492 if (IS_ERR(handle)) { 6493 ret = PTR_ERR(handle); 6494 mlog_errno(ret); 6495 goto out; 6496 } 6497 6498 ret = ocfs2_journal_access_di(handle, INODE_CACHE(args->new_inode), 6499 args->new_bh, OCFS2_JOURNAL_ACCESS_WRITE); 6500 if (ret) { 6501 mlog_errno(ret); 6502 goto out_commit; 6503 } 6504 6505 memcpy(args->new_bh->b_data + header_off, 6506 args->old_bh->b_data + header_off, inline_size); 6507 6508 new_di = (struct ocfs2_dinode *)args->new_bh->b_data; 6509 new_di->i_xattr_inline_size = cpu_to_le16(inline_size); 6510 6511 ret = ocfs2_reflink_xattr_header(handle, args, args->old_bh, xh, 6512 args->new_bh, new_xh, &vb, meta_ac, 6513 ocfs2_get_xattr_value_root, NULL); 6514 if (ret) { 6515 mlog_errno(ret); 6516 goto out_commit; 6517 } 6518 6519 new_oi = OCFS2_I(args->new_inode); 6520 /* 6521 * Adjust extent record count to reserve space for extended attribute. 6522 * Inline data count had been adjusted in ocfs2_duplicate_inline_data(). 6523 */ 6524 if (!(new_oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) && 6525 !(ocfs2_inode_is_fast_symlink(args->new_inode))) { 6526 struct ocfs2_extent_list *el = &new_di->id2.i_list; 6527 le16_add_cpu(&el->l_count, -(inline_size / 6528 sizeof(struct ocfs2_extent_rec))); 6529 } 6530 spin_lock(&new_oi->ip_lock); 6531 new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL; 6532 new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features); 6533 spin_unlock(&new_oi->ip_lock); 6534 6535 ocfs2_journal_dirty(handle, args->new_bh); 6536 6537 out_commit: 6538 ocfs2_commit_trans(osb, handle); 6539 6540 out: 6541 if (meta_ac) 6542 ocfs2_free_alloc_context(meta_ac); 6543 return ret; 6544 } 6545 6546 static int ocfs2_create_empty_xattr_block(struct inode *inode, 6547 struct buffer_head *fe_bh, 6548 struct buffer_head **ret_bh, 6549 int indexed) 6550 { 6551 int ret; 6552 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 6553 struct ocfs2_xattr_set_ctxt ctxt; 6554 6555 memset(&ctxt, 0, sizeof(ctxt)); 6556 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &ctxt.meta_ac); 6557 if (ret < 0) { 6558 mlog_errno(ret); 6559 return ret; 6560 } 6561 6562 ctxt.handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_CREATE_CREDITS); 6563 if (IS_ERR(ctxt.handle)) { 6564 ret = PTR_ERR(ctxt.handle); 6565 mlog_errno(ret); 6566 goto out; 6567 } 6568 6569 trace_ocfs2_create_empty_xattr_block( 6570 (unsigned long long)fe_bh->b_blocknr, indexed); 6571 ret = ocfs2_create_xattr_block(inode, fe_bh, &ctxt, indexed, 6572 ret_bh); 6573 if (ret) 6574 mlog_errno(ret); 6575 6576 ocfs2_commit_trans(osb, ctxt.handle); 6577 out: 6578 ocfs2_free_alloc_context(ctxt.meta_ac); 6579 return ret; 6580 } 6581 6582 static int ocfs2_reflink_xattr_block(struct ocfs2_xattr_reflink *args, 6583 struct buffer_head *blk_bh, 6584 struct buffer_head *new_blk_bh) 6585 { 6586 int ret = 0, credits = 0; 6587 handle_t *handle; 6588 struct ocfs2_inode_info *new_oi = OCFS2_I(args->new_inode); 6589 struct ocfs2_dinode *new_di; 6590 struct ocfs2_super *osb = OCFS2_SB(args->new_inode->i_sb); 6591 int header_off = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); 6592 struct ocfs2_xattr_block *xb = 6593 (struct ocfs2_xattr_block *)blk_bh->b_data; 6594 struct ocfs2_xattr_header *xh = &xb->xb_attrs.xb_header; 6595 struct ocfs2_xattr_block *new_xb = 6596 (struct ocfs2_xattr_block *)new_blk_bh->b_data; 6597 struct ocfs2_xattr_header *new_xh = &new_xb->xb_attrs.xb_header; 6598 struct ocfs2_alloc_context *meta_ac; 6599 struct ocfs2_xattr_value_buf vb = { 6600 .vb_bh = new_blk_bh, 6601 .vb_access = ocfs2_journal_access_xb, 6602 }; 6603 6604 ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh, 6605 &credits, &meta_ac); 6606 if (ret) { 6607 mlog_errno(ret); 6608 return ret; 6609 } 6610 6611 /* One more credits in case we need to add xattr flags in new inode. */ 6612 handle = ocfs2_start_trans(osb, credits + 1); 6613 if (IS_ERR(handle)) { 6614 ret = PTR_ERR(handle); 6615 mlog_errno(ret); 6616 goto out; 6617 } 6618 6619 if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) { 6620 ret = ocfs2_journal_access_di(handle, 6621 INODE_CACHE(args->new_inode), 6622 args->new_bh, 6623 OCFS2_JOURNAL_ACCESS_WRITE); 6624 if (ret) { 6625 mlog_errno(ret); 6626 goto out_commit; 6627 } 6628 } 6629 6630 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(args->new_inode), 6631 new_blk_bh, OCFS2_JOURNAL_ACCESS_WRITE); 6632 if (ret) { 6633 mlog_errno(ret); 6634 goto out_commit; 6635 } 6636 6637 memcpy(new_blk_bh->b_data + header_off, blk_bh->b_data + header_off, 6638 osb->sb->s_blocksize - header_off); 6639 6640 ret = ocfs2_reflink_xattr_header(handle, args, blk_bh, xh, 6641 new_blk_bh, new_xh, &vb, meta_ac, 6642 ocfs2_get_xattr_value_root, NULL); 6643 if (ret) { 6644 mlog_errno(ret); 6645 goto out_commit; 6646 } 6647 6648 ocfs2_journal_dirty(handle, new_blk_bh); 6649 6650 if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) { 6651 new_di = (struct ocfs2_dinode *)args->new_bh->b_data; 6652 spin_lock(&new_oi->ip_lock); 6653 new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL; 6654 new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features); 6655 spin_unlock(&new_oi->ip_lock); 6656 6657 ocfs2_journal_dirty(handle, args->new_bh); 6658 } 6659 6660 out_commit: 6661 ocfs2_commit_trans(osb, handle); 6662 6663 out: 6664 ocfs2_free_alloc_context(meta_ac); 6665 return ret; 6666 } 6667 6668 struct ocfs2_reflink_xattr_tree_args { 6669 struct ocfs2_xattr_reflink *reflink; 6670 struct buffer_head *old_blk_bh; 6671 struct buffer_head *new_blk_bh; 6672 struct ocfs2_xattr_bucket *old_bucket; 6673 struct ocfs2_xattr_bucket *new_bucket; 6674 }; 6675 6676 /* 6677 * NOTE: 6678 * We have to handle the case that both old bucket and new bucket 6679 * will call this function to get the right ret_bh. 6680 * So The caller must give us the right bh. 6681 */ 6682 static int ocfs2_get_reflink_xattr_value_root(struct super_block *sb, 6683 struct buffer_head *bh, 6684 struct ocfs2_xattr_header *xh, 6685 int offset, 6686 struct ocfs2_xattr_value_root **xv, 6687 struct buffer_head **ret_bh, 6688 void *para) 6689 { 6690 struct ocfs2_reflink_xattr_tree_args *args = 6691 (struct ocfs2_reflink_xattr_tree_args *)para; 6692 struct ocfs2_xattr_bucket *bucket; 6693 6694 if (bh == args->old_bucket->bu_bhs[0]) 6695 bucket = args->old_bucket; 6696 else 6697 bucket = args->new_bucket; 6698 6699 return ocfs2_get_xattr_tree_value_root(sb, bucket, offset, 6700 xv, ret_bh); 6701 } 6702 6703 struct ocfs2_value_tree_metas { 6704 int num_metas; 6705 int credits; 6706 int num_recs; 6707 }; 6708 6709 static int ocfs2_value_tree_metas_in_bucket(struct super_block *sb, 6710 struct buffer_head *bh, 6711 struct ocfs2_xattr_header *xh, 6712 int offset, 6713 struct ocfs2_xattr_value_root **xv, 6714 struct buffer_head **ret_bh, 6715 void *para) 6716 { 6717 struct ocfs2_xattr_bucket *bucket = 6718 (struct ocfs2_xattr_bucket *)para; 6719 6720 return ocfs2_get_xattr_tree_value_root(sb, bucket, offset, 6721 xv, ret_bh); 6722 } 6723 6724 static int ocfs2_calc_value_tree_metas(struct inode *inode, 6725 struct ocfs2_xattr_bucket *bucket, 6726 void *para) 6727 { 6728 struct ocfs2_value_tree_metas *metas = 6729 (struct ocfs2_value_tree_metas *)para; 6730 struct ocfs2_xattr_header *xh = 6731 (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data; 6732 6733 /* Add the credits for this bucket first. */ 6734 metas->credits += bucket->bu_blocks; 6735 return ocfs2_value_metas_in_xattr_header(inode->i_sb, bucket->bu_bhs[0], 6736 xh, &metas->num_metas, 6737 &metas->credits, &metas->num_recs, 6738 ocfs2_value_tree_metas_in_bucket, 6739 bucket); 6740 } 6741 6742 /* 6743 * Given a xattr extent rec starting from blkno and having len clusters, 6744 * iterate all the buckets calculate how much metadata we need for reflinking 6745 * all the ocfs2_xattr_value_root and lock the allocators accordingly. 6746 */ 6747 static int ocfs2_lock_reflink_xattr_rec_allocators( 6748 struct ocfs2_reflink_xattr_tree_args *args, 6749 struct ocfs2_extent_tree *xt_et, 6750 u64 blkno, u32 len, int *credits, 6751 struct ocfs2_alloc_context **meta_ac, 6752 struct ocfs2_alloc_context **data_ac) 6753 { 6754 int ret, num_free_extents; 6755 struct ocfs2_value_tree_metas metas; 6756 struct ocfs2_super *osb = OCFS2_SB(args->reflink->old_inode->i_sb); 6757 struct ocfs2_refcount_block *rb; 6758 6759 memset(&metas, 0, sizeof(metas)); 6760 6761 ret = ocfs2_iterate_xattr_buckets(args->reflink->old_inode, blkno, len, 6762 ocfs2_calc_value_tree_metas, &metas); 6763 if (ret) { 6764 mlog_errno(ret); 6765 goto out; 6766 } 6767 6768 *credits = metas.credits; 6769 6770 /* 6771 * Calculate we need for refcount tree change. 6772 * 6773 * We need to add/modify num_recs in refcount tree, so just calculate 6774 * an approximate number we need for refcount tree change. 6775 * Sometimes we need to split the tree, and after split, half recs 6776 * will be moved to the new block, and a new block can only provide 6777 * half number of recs. So we multiple new blocks by 2. 6778 * In the end, we have to add credits for modifying the already 6779 * existed refcount block. 6780 */ 6781 rb = (struct ocfs2_refcount_block *)args->reflink->ref_root_bh->b_data; 6782 metas.num_recs = 6783 (metas.num_recs + ocfs2_refcount_recs_per_rb(osb->sb) - 1) / 6784 ocfs2_refcount_recs_per_rb(osb->sb) * 2; 6785 metas.num_metas += metas.num_recs; 6786 *credits += metas.num_recs + 6787 metas.num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS; 6788 if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL) 6789 *credits += le16_to_cpu(rb->rf_list.l_tree_depth) * 6790 le16_to_cpu(rb->rf_list.l_next_free_rec) + 1; 6791 else 6792 *credits += 1; 6793 6794 /* count in the xattr tree change. */ 6795 num_free_extents = ocfs2_num_free_extents(xt_et); 6796 if (num_free_extents < 0) { 6797 ret = num_free_extents; 6798 mlog_errno(ret); 6799 goto out; 6800 } 6801 6802 if (num_free_extents < len) 6803 metas.num_metas += ocfs2_extend_meta_needed(xt_et->et_root_el); 6804 6805 *credits += ocfs2_calc_extend_credits(osb->sb, 6806 xt_et->et_root_el); 6807 6808 if (metas.num_metas) { 6809 ret = ocfs2_reserve_new_metadata_blocks(osb, metas.num_metas, 6810 meta_ac); 6811 if (ret) { 6812 mlog_errno(ret); 6813 goto out; 6814 } 6815 } 6816 6817 if (len) { 6818 ret = ocfs2_reserve_clusters(osb, len, data_ac); 6819 if (ret) 6820 mlog_errno(ret); 6821 } 6822 out: 6823 if (ret) { 6824 if (*meta_ac) { 6825 ocfs2_free_alloc_context(*meta_ac); 6826 *meta_ac = NULL; 6827 } 6828 } 6829 6830 return ret; 6831 } 6832 6833 static int ocfs2_reflink_xattr_bucket(handle_t *handle, 6834 u64 blkno, u64 new_blkno, u32 clusters, 6835 u32 *cpos, int num_buckets, 6836 struct ocfs2_alloc_context *meta_ac, 6837 struct ocfs2_alloc_context *data_ac, 6838 struct ocfs2_reflink_xattr_tree_args *args) 6839 { 6840 int i, j, ret = 0; 6841 struct super_block *sb = args->reflink->old_inode->i_sb; 6842 int bpb = args->old_bucket->bu_blocks; 6843 struct ocfs2_xattr_value_buf vb = { 6844 .vb_access = ocfs2_journal_access, 6845 }; 6846 6847 for (i = 0; i < num_buckets; i++, blkno += bpb, new_blkno += bpb) { 6848 ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno); 6849 if (ret) { 6850 mlog_errno(ret); 6851 break; 6852 } 6853 6854 ret = ocfs2_init_xattr_bucket(args->new_bucket, new_blkno, 1); 6855 if (ret) { 6856 mlog_errno(ret); 6857 break; 6858 } 6859 6860 ret = ocfs2_xattr_bucket_journal_access(handle, 6861 args->new_bucket, 6862 OCFS2_JOURNAL_ACCESS_CREATE); 6863 if (ret) { 6864 mlog_errno(ret); 6865 break; 6866 } 6867 6868 for (j = 0; j < bpb; j++) 6869 memcpy(bucket_block(args->new_bucket, j), 6870 bucket_block(args->old_bucket, j), 6871 sb->s_blocksize); 6872 6873 /* 6874 * Record the start cpos so that we can use it to initialize 6875 * our xattr tree we also set the xh_num_bucket for the new 6876 * bucket. 6877 */ 6878 if (i == 0) { 6879 *cpos = le32_to_cpu(bucket_xh(args->new_bucket)-> 6880 xh_entries[0].xe_name_hash); 6881 bucket_xh(args->new_bucket)->xh_num_buckets = 6882 cpu_to_le16(num_buckets); 6883 } 6884 6885 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket); 6886 6887 ret = ocfs2_reflink_xattr_header(handle, args->reflink, 6888 args->old_bucket->bu_bhs[0], 6889 bucket_xh(args->old_bucket), 6890 args->new_bucket->bu_bhs[0], 6891 bucket_xh(args->new_bucket), 6892 &vb, meta_ac, 6893 ocfs2_get_reflink_xattr_value_root, 6894 args); 6895 if (ret) { 6896 mlog_errno(ret); 6897 break; 6898 } 6899 6900 /* 6901 * Re-access and dirty the bucket to calculate metaecc. 6902 * Because we may extend the transaction in reflink_xattr_header 6903 * which will let the already accessed block gone. 6904 */ 6905 ret = ocfs2_xattr_bucket_journal_access(handle, 6906 args->new_bucket, 6907 OCFS2_JOURNAL_ACCESS_WRITE); 6908 if (ret) { 6909 mlog_errno(ret); 6910 break; 6911 } 6912 6913 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket); 6914 6915 ocfs2_xattr_bucket_relse(args->old_bucket); 6916 ocfs2_xattr_bucket_relse(args->new_bucket); 6917 } 6918 6919 ocfs2_xattr_bucket_relse(args->old_bucket); 6920 ocfs2_xattr_bucket_relse(args->new_bucket); 6921 return ret; 6922 } 6923 6924 static int ocfs2_reflink_xattr_buckets(handle_t *handle, 6925 struct inode *inode, 6926 struct ocfs2_reflink_xattr_tree_args *args, 6927 struct ocfs2_extent_tree *et, 6928 struct ocfs2_alloc_context *meta_ac, 6929 struct ocfs2_alloc_context *data_ac, 6930 u64 blkno, u32 cpos, u32 len) 6931 { 6932 int ret, first_inserted = 0; 6933 u32 p_cluster, num_clusters, reflink_cpos = 0; 6934 u64 new_blkno; 6935 unsigned int num_buckets, reflink_buckets; 6936 unsigned int bpc = 6937 ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)); 6938 6939 ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno); 6940 if (ret) { 6941 mlog_errno(ret); 6942 goto out; 6943 } 6944 num_buckets = le16_to_cpu(bucket_xh(args->old_bucket)->xh_num_buckets); 6945 ocfs2_xattr_bucket_relse(args->old_bucket); 6946 6947 while (len && num_buckets) { 6948 ret = ocfs2_claim_clusters(handle, data_ac, 6949 1, &p_cluster, &num_clusters); 6950 if (ret) { 6951 mlog_errno(ret); 6952 goto out; 6953 } 6954 6955 new_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 6956 reflink_buckets = min(num_buckets, bpc * num_clusters); 6957 6958 ret = ocfs2_reflink_xattr_bucket(handle, blkno, 6959 new_blkno, num_clusters, 6960 &reflink_cpos, reflink_buckets, 6961 meta_ac, data_ac, args); 6962 if (ret) { 6963 mlog_errno(ret); 6964 goto out; 6965 } 6966 6967 /* 6968 * For the 1st allocated cluster, we make it use the same cpos 6969 * so that the xattr tree looks the same as the original one 6970 * in the most case. 6971 */ 6972 if (!first_inserted) { 6973 reflink_cpos = cpos; 6974 first_inserted = 1; 6975 } 6976 ret = ocfs2_insert_extent(handle, et, reflink_cpos, new_blkno, 6977 num_clusters, 0, meta_ac); 6978 if (ret) 6979 mlog_errno(ret); 6980 6981 trace_ocfs2_reflink_xattr_buckets((unsigned long long)new_blkno, 6982 num_clusters, reflink_cpos); 6983 6984 len -= num_clusters; 6985 blkno += ocfs2_clusters_to_blocks(inode->i_sb, num_clusters); 6986 num_buckets -= reflink_buckets; 6987 } 6988 out: 6989 return ret; 6990 } 6991 6992 /* 6993 * Create the same xattr extent record in the new inode's xattr tree. 6994 */ 6995 static int ocfs2_reflink_xattr_rec(struct inode *inode, 6996 struct buffer_head *root_bh, 6997 u64 blkno, 6998 u32 cpos, 6999 u32 len, 7000 void *para) 7001 { 7002 int ret, credits = 0; 7003 handle_t *handle; 7004 struct ocfs2_reflink_xattr_tree_args *args = 7005 (struct ocfs2_reflink_xattr_tree_args *)para; 7006 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 7007 struct ocfs2_alloc_context *meta_ac = NULL; 7008 struct ocfs2_alloc_context *data_ac = NULL; 7009 struct ocfs2_extent_tree et; 7010 7011 trace_ocfs2_reflink_xattr_rec((unsigned long long)blkno, len); 7012 7013 ocfs2_init_xattr_tree_extent_tree(&et, 7014 INODE_CACHE(args->reflink->new_inode), 7015 args->new_blk_bh); 7016 7017 ret = ocfs2_lock_reflink_xattr_rec_allocators(args, &et, blkno, 7018 len, &credits, 7019 &meta_ac, &data_ac); 7020 if (ret) { 7021 mlog_errno(ret); 7022 goto out; 7023 } 7024 7025 handle = ocfs2_start_trans(osb, credits); 7026 if (IS_ERR(handle)) { 7027 ret = PTR_ERR(handle); 7028 mlog_errno(ret); 7029 goto out; 7030 } 7031 7032 ret = ocfs2_reflink_xattr_buckets(handle, inode, args, &et, 7033 meta_ac, data_ac, 7034 blkno, cpos, len); 7035 if (ret) 7036 mlog_errno(ret); 7037 7038 ocfs2_commit_trans(osb, handle); 7039 7040 out: 7041 if (meta_ac) 7042 ocfs2_free_alloc_context(meta_ac); 7043 if (data_ac) 7044 ocfs2_free_alloc_context(data_ac); 7045 return ret; 7046 } 7047 7048 /* 7049 * Create reflinked xattr buckets. 7050 * We will add bucket one by one, and refcount all the xattrs in the bucket 7051 * if they are stored outside. 7052 */ 7053 static int ocfs2_reflink_xattr_tree(struct ocfs2_xattr_reflink *args, 7054 struct buffer_head *blk_bh, 7055 struct buffer_head *new_blk_bh) 7056 { 7057 int ret; 7058 struct ocfs2_reflink_xattr_tree_args para; 7059 7060 memset(¶, 0, sizeof(para)); 7061 para.reflink = args; 7062 para.old_blk_bh = blk_bh; 7063 para.new_blk_bh = new_blk_bh; 7064 7065 para.old_bucket = ocfs2_xattr_bucket_new(args->old_inode); 7066 if (!para.old_bucket) { 7067 mlog_errno(-ENOMEM); 7068 return -ENOMEM; 7069 } 7070 7071 para.new_bucket = ocfs2_xattr_bucket_new(args->new_inode); 7072 if (!para.new_bucket) { 7073 ret = -ENOMEM; 7074 mlog_errno(ret); 7075 goto out; 7076 } 7077 7078 ret = ocfs2_iterate_xattr_index_block(args->old_inode, blk_bh, 7079 ocfs2_reflink_xattr_rec, 7080 ¶); 7081 if (ret) 7082 mlog_errno(ret); 7083 7084 out: 7085 ocfs2_xattr_bucket_free(para.old_bucket); 7086 ocfs2_xattr_bucket_free(para.new_bucket); 7087 return ret; 7088 } 7089 7090 static int ocfs2_reflink_xattr_in_block(struct ocfs2_xattr_reflink *args, 7091 struct buffer_head *blk_bh) 7092 { 7093 int ret, indexed = 0; 7094 struct buffer_head *new_blk_bh = NULL; 7095 struct ocfs2_xattr_block *xb = 7096 (struct ocfs2_xattr_block *)blk_bh->b_data; 7097 7098 7099 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) 7100 indexed = 1; 7101 7102 ret = ocfs2_create_empty_xattr_block(args->new_inode, args->new_bh, 7103 &new_blk_bh, indexed); 7104 if (ret) { 7105 mlog_errno(ret); 7106 goto out; 7107 } 7108 7109 if (!indexed) 7110 ret = ocfs2_reflink_xattr_block(args, blk_bh, new_blk_bh); 7111 else 7112 ret = ocfs2_reflink_xattr_tree(args, blk_bh, new_blk_bh); 7113 if (ret) 7114 mlog_errno(ret); 7115 7116 out: 7117 brelse(new_blk_bh); 7118 return ret; 7119 } 7120 7121 static int ocfs2_reflink_xattr_no_security(struct ocfs2_xattr_entry *xe) 7122 { 7123 int type = ocfs2_xattr_get_type(xe); 7124 7125 return type != OCFS2_XATTR_INDEX_SECURITY && 7126 type != OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS && 7127 type != OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT; 7128 } 7129 7130 int ocfs2_reflink_xattrs(struct inode *old_inode, 7131 struct buffer_head *old_bh, 7132 struct inode *new_inode, 7133 struct buffer_head *new_bh, 7134 bool preserve_security) 7135 { 7136 int ret; 7137 struct ocfs2_xattr_reflink args; 7138 struct ocfs2_inode_info *oi = OCFS2_I(old_inode); 7139 struct ocfs2_dinode *di = (struct ocfs2_dinode *)old_bh->b_data; 7140 struct buffer_head *blk_bh = NULL; 7141 struct ocfs2_cached_dealloc_ctxt dealloc; 7142 struct ocfs2_refcount_tree *ref_tree; 7143 struct buffer_head *ref_root_bh = NULL; 7144 7145 ret = ocfs2_lock_refcount_tree(OCFS2_SB(old_inode->i_sb), 7146 le64_to_cpu(di->i_refcount_loc), 7147 1, &ref_tree, &ref_root_bh); 7148 if (ret) { 7149 mlog_errno(ret); 7150 goto out; 7151 } 7152 7153 ocfs2_init_dealloc_ctxt(&dealloc); 7154 7155 args.old_inode = old_inode; 7156 args.new_inode = new_inode; 7157 args.old_bh = old_bh; 7158 args.new_bh = new_bh; 7159 args.ref_ci = &ref_tree->rf_ci; 7160 args.ref_root_bh = ref_root_bh; 7161 args.dealloc = &dealloc; 7162 if (preserve_security) 7163 args.xattr_reflinked = NULL; 7164 else 7165 args.xattr_reflinked = ocfs2_reflink_xattr_no_security; 7166 7167 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 7168 ret = ocfs2_reflink_xattr_inline(&args); 7169 if (ret) { 7170 mlog_errno(ret); 7171 goto out_unlock; 7172 } 7173 } 7174 7175 if (!di->i_xattr_loc) 7176 goto out_unlock; 7177 7178 ret = ocfs2_read_xattr_block(old_inode, le64_to_cpu(di->i_xattr_loc), 7179 &blk_bh); 7180 if (ret < 0) { 7181 mlog_errno(ret); 7182 goto out_unlock; 7183 } 7184 7185 ret = ocfs2_reflink_xattr_in_block(&args, blk_bh); 7186 if (ret) 7187 mlog_errno(ret); 7188 7189 brelse(blk_bh); 7190 7191 out_unlock: 7192 ocfs2_unlock_refcount_tree(OCFS2_SB(old_inode->i_sb), 7193 ref_tree, 1); 7194 brelse(ref_root_bh); 7195 7196 if (ocfs2_dealloc_has_cluster(&dealloc)) { 7197 ocfs2_schedule_truncate_log_flush(OCFS2_SB(old_inode->i_sb), 1); 7198 ocfs2_run_deallocs(OCFS2_SB(old_inode->i_sb), &dealloc); 7199 } 7200 7201 out: 7202 return ret; 7203 } 7204 7205 /* 7206 * Initialize security and acl for a already created inode. 7207 * Used for reflink a non-preserve-security file. 7208 * 7209 * It uses common api like ocfs2_xattr_set, so the caller 7210 * must not hold any lock expect i_mutex. 7211 */ 7212 int ocfs2_init_security_and_acl(struct inode *dir, 7213 struct inode *inode, 7214 const struct qstr *qstr) 7215 { 7216 int ret = 0; 7217 struct buffer_head *dir_bh = NULL; 7218 7219 ret = ocfs2_init_security_get(inode, dir, qstr, NULL); 7220 if (ret) { 7221 mlog_errno(ret); 7222 goto leave; 7223 } 7224 7225 ret = ocfs2_inode_lock(dir, &dir_bh, 0); 7226 if (ret) { 7227 mlog_errno(ret); 7228 goto leave; 7229 } 7230 ret = ocfs2_init_acl(NULL, inode, dir, NULL, dir_bh, NULL, NULL); 7231 if (ret) 7232 mlog_errno(ret); 7233 7234 ocfs2_inode_unlock(dir, 0); 7235 brelse(dir_bh); 7236 leave: 7237 return ret; 7238 } 7239 7240 /* 7241 * 'security' attributes support 7242 */ 7243 static int ocfs2_xattr_security_get(const struct xattr_handler *handler, 7244 struct dentry *unused, struct inode *inode, 7245 const char *name, void *buffer, size_t size) 7246 { 7247 return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_SECURITY, 7248 name, buffer, size); 7249 } 7250 7251 static int ocfs2_xattr_security_set(const struct xattr_handler *handler, 7252 struct user_namespace *mnt_userns, 7253 struct dentry *unused, struct inode *inode, 7254 const char *name, const void *value, 7255 size_t size, int flags) 7256 { 7257 return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, 7258 name, value, size, flags); 7259 } 7260 7261 static int ocfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array, 7262 void *fs_info) 7263 { 7264 const struct xattr *xattr; 7265 int err = 0; 7266 7267 for (xattr = xattr_array; xattr->name != NULL; xattr++) { 7268 err = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, 7269 xattr->name, xattr->value, 7270 xattr->value_len, XATTR_CREATE); 7271 if (err) 7272 break; 7273 } 7274 return err; 7275 } 7276 7277 int ocfs2_init_security_get(struct inode *inode, 7278 struct inode *dir, 7279 const struct qstr *qstr, 7280 struct ocfs2_security_xattr_info *si) 7281 { 7282 /* check whether ocfs2 support feature xattr */ 7283 if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb))) 7284 return -EOPNOTSUPP; 7285 if (si) 7286 return security_old_inode_init_security(inode, dir, qstr, 7287 &si->name, &si->value, 7288 &si->value_len); 7289 7290 return security_inode_init_security(inode, dir, qstr, 7291 &ocfs2_initxattrs, NULL); 7292 } 7293 7294 int ocfs2_init_security_set(handle_t *handle, 7295 struct inode *inode, 7296 struct buffer_head *di_bh, 7297 struct ocfs2_security_xattr_info *si, 7298 struct ocfs2_alloc_context *xattr_ac, 7299 struct ocfs2_alloc_context *data_ac) 7300 { 7301 return ocfs2_xattr_set_handle(handle, inode, di_bh, 7302 OCFS2_XATTR_INDEX_SECURITY, 7303 si->name, si->value, si->value_len, 0, 7304 xattr_ac, data_ac); 7305 } 7306 7307 const struct xattr_handler ocfs2_xattr_security_handler = { 7308 .prefix = XATTR_SECURITY_PREFIX, 7309 .get = ocfs2_xattr_security_get, 7310 .set = ocfs2_xattr_security_set, 7311 }; 7312 7313 /* 7314 * 'trusted' attributes support 7315 */ 7316 static int ocfs2_xattr_trusted_get(const struct xattr_handler *handler, 7317 struct dentry *unused, struct inode *inode, 7318 const char *name, void *buffer, size_t size) 7319 { 7320 return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_TRUSTED, 7321 name, buffer, size); 7322 } 7323 7324 static int ocfs2_xattr_trusted_set(const struct xattr_handler *handler, 7325 struct user_namespace *mnt_userns, 7326 struct dentry *unused, struct inode *inode, 7327 const char *name, const void *value, 7328 size_t size, int flags) 7329 { 7330 return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_TRUSTED, 7331 name, value, size, flags); 7332 } 7333 7334 const struct xattr_handler ocfs2_xattr_trusted_handler = { 7335 .prefix = XATTR_TRUSTED_PREFIX, 7336 .get = ocfs2_xattr_trusted_get, 7337 .set = ocfs2_xattr_trusted_set, 7338 }; 7339 7340 /* 7341 * 'user' attributes support 7342 */ 7343 static int ocfs2_xattr_user_get(const struct xattr_handler *handler, 7344 struct dentry *unused, struct inode *inode, 7345 const char *name, void *buffer, size_t size) 7346 { 7347 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 7348 7349 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 7350 return -EOPNOTSUPP; 7351 return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_USER, name, 7352 buffer, size); 7353 } 7354 7355 static int ocfs2_xattr_user_set(const struct xattr_handler *handler, 7356 struct user_namespace *mnt_userns, 7357 struct dentry *unused, struct inode *inode, 7358 const char *name, const void *value, 7359 size_t size, int flags) 7360 { 7361 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 7362 7363 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 7364 return -EOPNOTSUPP; 7365 7366 return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_USER, 7367 name, value, size, flags); 7368 } 7369 7370 const struct xattr_handler ocfs2_xattr_user_handler = { 7371 .prefix = XATTR_USER_PREFIX, 7372 .get = ocfs2_xattr_user_get, 7373 .set = ocfs2_xattr_user_set, 7374 }; 7375