1 /* -*- mode: c; c-basic-offset: 8; -*- 2 * vim: noexpandtab sw=8 ts=8 sts=0: 3 * 4 * xattr.c 5 * 6 * Copyright (C) 2004, 2008 Oracle. All rights reserved. 7 * 8 * CREDITS: 9 * Lots of code in this file is copy from linux/fs/ext3/xattr.c. 10 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> 11 * 12 * This program is free software; you can redistribute it and/or 13 * modify it under the terms of the GNU General Public 14 * License version 2 as published by the Free Software Foundation. 15 * 16 * This program is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 * General Public License for more details. 20 */ 21 22 #include <linux/capability.h> 23 #include <linux/fs.h> 24 #include <linux/types.h> 25 #include <linux/slab.h> 26 #include <linux/highmem.h> 27 #include <linux/pagemap.h> 28 #include <linux/uio.h> 29 #include <linux/sched.h> 30 #include <linux/splice.h> 31 #include <linux/mount.h> 32 #include <linux/writeback.h> 33 #include <linux/falloc.h> 34 #include <linux/sort.h> 35 #include <linux/init.h> 36 #include <linux/module.h> 37 #include <linux/string.h> 38 #include <linux/security.h> 39 40 #include <cluster/masklog.h> 41 42 #include "ocfs2.h" 43 #include "alloc.h" 44 #include "blockcheck.h" 45 #include "dlmglue.h" 46 #include "file.h" 47 #include "symlink.h" 48 #include "sysfile.h" 49 #include "inode.h" 50 #include "journal.h" 51 #include "ocfs2_fs.h" 52 #include "suballoc.h" 53 #include "uptodate.h" 54 #include "buffer_head_io.h" 55 #include "super.h" 56 #include "xattr.h" 57 #include "refcounttree.h" 58 #include "acl.h" 59 #include "ocfs2_trace.h" 60 61 struct ocfs2_xattr_def_value_root { 62 struct ocfs2_xattr_value_root xv; 63 struct ocfs2_extent_rec er; 64 }; 65 66 struct ocfs2_xattr_bucket { 67 /* The inode these xattrs are associated with */ 68 struct inode *bu_inode; 69 70 /* The actual buffers that make up the bucket */ 71 struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET]; 72 73 /* How many blocks make up one bucket for this filesystem */ 74 int bu_blocks; 75 }; 76 77 struct ocfs2_xattr_set_ctxt { 78 handle_t *handle; 79 struct ocfs2_alloc_context *meta_ac; 80 struct ocfs2_alloc_context *data_ac; 81 struct ocfs2_cached_dealloc_ctxt dealloc; 82 int set_abort; 83 }; 84 85 #define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root)) 86 #define OCFS2_XATTR_INLINE_SIZE 80 87 #define OCFS2_XATTR_HEADER_GAP 4 88 #define OCFS2_XATTR_FREE_IN_IBODY (OCFS2_MIN_XATTR_INLINE_SIZE \ 89 - sizeof(struct ocfs2_xattr_header) \ 90 - OCFS2_XATTR_HEADER_GAP) 91 #define OCFS2_XATTR_FREE_IN_BLOCK(ptr) ((ptr)->i_sb->s_blocksize \ 92 - sizeof(struct ocfs2_xattr_block) \ 93 - sizeof(struct ocfs2_xattr_header) \ 94 - OCFS2_XATTR_HEADER_GAP) 95 96 static struct ocfs2_xattr_def_value_root def_xv = { 97 .xv.xr_list.l_count = cpu_to_le16(1), 98 }; 99 100 const struct xattr_handler *ocfs2_xattr_handlers[] = { 101 &ocfs2_xattr_user_handler, 102 &posix_acl_access_xattr_handler, 103 &posix_acl_default_xattr_handler, 104 &ocfs2_xattr_trusted_handler, 105 &ocfs2_xattr_security_handler, 106 NULL 107 }; 108 109 static const struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = { 110 [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler, 111 [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS] 112 = &posix_acl_access_xattr_handler, 113 [OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT] 114 = &posix_acl_default_xattr_handler, 115 [OCFS2_XATTR_INDEX_TRUSTED] = &ocfs2_xattr_trusted_handler, 116 [OCFS2_XATTR_INDEX_SECURITY] = &ocfs2_xattr_security_handler, 117 }; 118 119 struct ocfs2_xattr_info { 120 int xi_name_index; 121 const char *xi_name; 122 int xi_name_len; 123 const void *xi_value; 124 size_t xi_value_len; 125 }; 126 127 struct ocfs2_xattr_search { 128 struct buffer_head *inode_bh; 129 /* 130 * xattr_bh point to the block buffer head which has extended attribute 131 * when extended attribute in inode, xattr_bh is equal to inode_bh. 132 */ 133 struct buffer_head *xattr_bh; 134 struct ocfs2_xattr_header *header; 135 struct ocfs2_xattr_bucket *bucket; 136 void *base; 137 void *end; 138 struct ocfs2_xattr_entry *here; 139 int not_found; 140 }; 141 142 /* Operations on struct ocfs2_xa_entry */ 143 struct ocfs2_xa_loc; 144 struct ocfs2_xa_loc_operations { 145 /* 146 * Journal functions 147 */ 148 int (*xlo_journal_access)(handle_t *handle, struct ocfs2_xa_loc *loc, 149 int type); 150 void (*xlo_journal_dirty)(handle_t *handle, struct ocfs2_xa_loc *loc); 151 152 /* 153 * Return a pointer to the appropriate buffer in loc->xl_storage 154 * at the given offset from loc->xl_header. 155 */ 156 void *(*xlo_offset_pointer)(struct ocfs2_xa_loc *loc, int offset); 157 158 /* Can we reuse the existing entry for the new value? */ 159 int (*xlo_can_reuse)(struct ocfs2_xa_loc *loc, 160 struct ocfs2_xattr_info *xi); 161 162 /* How much space is needed for the new value? */ 163 int (*xlo_check_space)(struct ocfs2_xa_loc *loc, 164 struct ocfs2_xattr_info *xi); 165 166 /* 167 * Return the offset of the first name+value pair. This is 168 * the start of our downward-filling free space. 169 */ 170 int (*xlo_get_free_start)(struct ocfs2_xa_loc *loc); 171 172 /* 173 * Remove the name+value at this location. Do whatever is 174 * appropriate with the remaining name+value pairs. 175 */ 176 void (*xlo_wipe_namevalue)(struct ocfs2_xa_loc *loc); 177 178 /* Fill xl_entry with a new entry */ 179 void (*xlo_add_entry)(struct ocfs2_xa_loc *loc, u32 name_hash); 180 181 /* Add name+value storage to an entry */ 182 void (*xlo_add_namevalue)(struct ocfs2_xa_loc *loc, int size); 183 184 /* 185 * Initialize the value buf's access and bh fields for this entry. 186 * ocfs2_xa_fill_value_buf() will handle the xv pointer. 187 */ 188 void (*xlo_fill_value_buf)(struct ocfs2_xa_loc *loc, 189 struct ocfs2_xattr_value_buf *vb); 190 }; 191 192 /* 193 * Describes an xattr entry location. This is a memory structure 194 * tracking the on-disk structure. 195 */ 196 struct ocfs2_xa_loc { 197 /* This xattr belongs to this inode */ 198 struct inode *xl_inode; 199 200 /* The ocfs2_xattr_header inside the on-disk storage. Not NULL. */ 201 struct ocfs2_xattr_header *xl_header; 202 203 /* Bytes from xl_header to the end of the storage */ 204 int xl_size; 205 206 /* 207 * The ocfs2_xattr_entry this location describes. If this is 208 * NULL, this location describes the on-disk structure where it 209 * would have been. 210 */ 211 struct ocfs2_xattr_entry *xl_entry; 212 213 /* 214 * Internal housekeeping 215 */ 216 217 /* Buffer(s) containing this entry */ 218 void *xl_storage; 219 220 /* Operations on the storage backing this location */ 221 const struct ocfs2_xa_loc_operations *xl_ops; 222 }; 223 224 /* 225 * Convenience functions to calculate how much space is needed for a 226 * given name+value pair 227 */ 228 static int namevalue_size(int name_len, uint64_t value_len) 229 { 230 if (value_len > OCFS2_XATTR_INLINE_SIZE) 231 return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE; 232 else 233 return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len); 234 } 235 236 static int namevalue_size_xi(struct ocfs2_xattr_info *xi) 237 { 238 return namevalue_size(xi->xi_name_len, xi->xi_value_len); 239 } 240 241 static int namevalue_size_xe(struct ocfs2_xattr_entry *xe) 242 { 243 u64 value_len = le64_to_cpu(xe->xe_value_size); 244 245 BUG_ON((value_len > OCFS2_XATTR_INLINE_SIZE) && 246 ocfs2_xattr_is_local(xe)); 247 return namevalue_size(xe->xe_name_len, value_len); 248 } 249 250 251 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb, 252 struct ocfs2_xattr_header *xh, 253 int index, 254 int *block_off, 255 int *new_offset); 256 257 static int ocfs2_xattr_block_find(struct inode *inode, 258 int name_index, 259 const char *name, 260 struct ocfs2_xattr_search *xs); 261 static int ocfs2_xattr_index_block_find(struct inode *inode, 262 struct buffer_head *root_bh, 263 int name_index, 264 const char *name, 265 struct ocfs2_xattr_search *xs); 266 267 static int ocfs2_xattr_tree_list_index_block(struct inode *inode, 268 struct buffer_head *blk_bh, 269 char *buffer, 270 size_t buffer_size); 271 272 static int ocfs2_xattr_create_index_block(struct inode *inode, 273 struct ocfs2_xattr_search *xs, 274 struct ocfs2_xattr_set_ctxt *ctxt); 275 276 static int ocfs2_xattr_set_entry_index_block(struct inode *inode, 277 struct ocfs2_xattr_info *xi, 278 struct ocfs2_xattr_search *xs, 279 struct ocfs2_xattr_set_ctxt *ctxt); 280 281 typedef int (xattr_tree_rec_func)(struct inode *inode, 282 struct buffer_head *root_bh, 283 u64 blkno, u32 cpos, u32 len, void *para); 284 static int ocfs2_iterate_xattr_index_block(struct inode *inode, 285 struct buffer_head *root_bh, 286 xattr_tree_rec_func *rec_func, 287 void *para); 288 static int ocfs2_delete_xattr_in_bucket(struct inode *inode, 289 struct ocfs2_xattr_bucket *bucket, 290 void *para); 291 static int ocfs2_rm_xattr_cluster(struct inode *inode, 292 struct buffer_head *root_bh, 293 u64 blkno, 294 u32 cpos, 295 u32 len, 296 void *para); 297 298 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle, 299 u64 src_blk, u64 last_blk, u64 to_blk, 300 unsigned int start_bucket, 301 u32 *first_hash); 302 static int ocfs2_prepare_refcount_xattr(struct inode *inode, 303 struct ocfs2_dinode *di, 304 struct ocfs2_xattr_info *xi, 305 struct ocfs2_xattr_search *xis, 306 struct ocfs2_xattr_search *xbs, 307 struct ocfs2_refcount_tree **ref_tree, 308 int *meta_need, 309 int *credits); 310 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb, 311 struct ocfs2_xattr_bucket *bucket, 312 int offset, 313 struct ocfs2_xattr_value_root **xv, 314 struct buffer_head **bh); 315 316 static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb) 317 { 318 return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE; 319 } 320 321 static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb) 322 { 323 return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits); 324 } 325 326 #define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr) 327 #define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data) 328 #define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0)) 329 330 static struct ocfs2_xattr_bucket *ocfs2_xattr_bucket_new(struct inode *inode) 331 { 332 struct ocfs2_xattr_bucket *bucket; 333 int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 334 335 BUG_ON(blks > OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET); 336 337 bucket = kzalloc(sizeof(struct ocfs2_xattr_bucket), GFP_NOFS); 338 if (bucket) { 339 bucket->bu_inode = inode; 340 bucket->bu_blocks = blks; 341 } 342 343 return bucket; 344 } 345 346 static void ocfs2_xattr_bucket_relse(struct ocfs2_xattr_bucket *bucket) 347 { 348 int i; 349 350 for (i = 0; i < bucket->bu_blocks; i++) { 351 brelse(bucket->bu_bhs[i]); 352 bucket->bu_bhs[i] = NULL; 353 } 354 } 355 356 static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket) 357 { 358 if (bucket) { 359 ocfs2_xattr_bucket_relse(bucket); 360 bucket->bu_inode = NULL; 361 kfree(bucket); 362 } 363 } 364 365 /* 366 * A bucket that has never been written to disk doesn't need to be 367 * read. We just need the buffer_heads. Don't call this for 368 * buckets that are already on disk. ocfs2_read_xattr_bucket() initializes 369 * them fully. 370 */ 371 static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket, 372 u64 xb_blkno, int new) 373 { 374 int i, rc = 0; 375 376 for (i = 0; i < bucket->bu_blocks; i++) { 377 bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb, 378 xb_blkno + i); 379 if (!bucket->bu_bhs[i]) { 380 rc = -ENOMEM; 381 mlog_errno(rc); 382 break; 383 } 384 385 if (!ocfs2_buffer_uptodate(INODE_CACHE(bucket->bu_inode), 386 bucket->bu_bhs[i])) { 387 if (new) 388 ocfs2_set_new_buffer_uptodate(INODE_CACHE(bucket->bu_inode), 389 bucket->bu_bhs[i]); 390 else { 391 set_buffer_uptodate(bucket->bu_bhs[i]); 392 ocfs2_set_buffer_uptodate(INODE_CACHE(bucket->bu_inode), 393 bucket->bu_bhs[i]); 394 } 395 } 396 } 397 398 if (rc) 399 ocfs2_xattr_bucket_relse(bucket); 400 return rc; 401 } 402 403 /* Read the xattr bucket at xb_blkno */ 404 static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket, 405 u64 xb_blkno) 406 { 407 int rc; 408 409 rc = ocfs2_read_blocks(INODE_CACHE(bucket->bu_inode), xb_blkno, 410 bucket->bu_blocks, bucket->bu_bhs, 0, 411 NULL); 412 if (!rc) { 413 spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 414 rc = ocfs2_validate_meta_ecc_bhs(bucket->bu_inode->i_sb, 415 bucket->bu_bhs, 416 bucket->bu_blocks, 417 &bucket_xh(bucket)->xh_check); 418 spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 419 if (rc) 420 mlog_errno(rc); 421 } 422 423 if (rc) 424 ocfs2_xattr_bucket_relse(bucket); 425 return rc; 426 } 427 428 static int ocfs2_xattr_bucket_journal_access(handle_t *handle, 429 struct ocfs2_xattr_bucket *bucket, 430 int type) 431 { 432 int i, rc = 0; 433 434 for (i = 0; i < bucket->bu_blocks; i++) { 435 rc = ocfs2_journal_access(handle, 436 INODE_CACHE(bucket->bu_inode), 437 bucket->bu_bhs[i], type); 438 if (rc) { 439 mlog_errno(rc); 440 break; 441 } 442 } 443 444 return rc; 445 } 446 447 static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle, 448 struct ocfs2_xattr_bucket *bucket) 449 { 450 int i; 451 452 spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 453 ocfs2_compute_meta_ecc_bhs(bucket->bu_inode->i_sb, 454 bucket->bu_bhs, bucket->bu_blocks, 455 &bucket_xh(bucket)->xh_check); 456 spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 457 458 for (i = 0; i < bucket->bu_blocks; i++) 459 ocfs2_journal_dirty(handle, bucket->bu_bhs[i]); 460 } 461 462 static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest, 463 struct ocfs2_xattr_bucket *src) 464 { 465 int i; 466 int blocksize = src->bu_inode->i_sb->s_blocksize; 467 468 BUG_ON(dest->bu_blocks != src->bu_blocks); 469 BUG_ON(dest->bu_inode != src->bu_inode); 470 471 for (i = 0; i < src->bu_blocks; i++) { 472 memcpy(bucket_block(dest, i), bucket_block(src, i), 473 blocksize); 474 } 475 } 476 477 static int ocfs2_validate_xattr_block(struct super_block *sb, 478 struct buffer_head *bh) 479 { 480 int rc; 481 struct ocfs2_xattr_block *xb = 482 (struct ocfs2_xattr_block *)bh->b_data; 483 484 trace_ocfs2_validate_xattr_block((unsigned long long)bh->b_blocknr); 485 486 BUG_ON(!buffer_uptodate(bh)); 487 488 /* 489 * If the ecc fails, we return the error but otherwise 490 * leave the filesystem running. We know any error is 491 * local to this block. 492 */ 493 rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &xb->xb_check); 494 if (rc) 495 return rc; 496 497 /* 498 * Errors after here are fatal 499 */ 500 501 if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) { 502 ocfs2_error(sb, 503 "Extended attribute block #%llu has bad " 504 "signature %.*s", 505 (unsigned long long)bh->b_blocknr, 7, 506 xb->xb_signature); 507 return -EINVAL; 508 } 509 510 if (le64_to_cpu(xb->xb_blkno) != bh->b_blocknr) { 511 ocfs2_error(sb, 512 "Extended attribute block #%llu has an " 513 "invalid xb_blkno of %llu", 514 (unsigned long long)bh->b_blocknr, 515 (unsigned long long)le64_to_cpu(xb->xb_blkno)); 516 return -EINVAL; 517 } 518 519 if (le32_to_cpu(xb->xb_fs_generation) != OCFS2_SB(sb)->fs_generation) { 520 ocfs2_error(sb, 521 "Extended attribute block #%llu has an invalid " 522 "xb_fs_generation of #%u", 523 (unsigned long long)bh->b_blocknr, 524 le32_to_cpu(xb->xb_fs_generation)); 525 return -EINVAL; 526 } 527 528 return 0; 529 } 530 531 static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno, 532 struct buffer_head **bh) 533 { 534 int rc; 535 struct buffer_head *tmp = *bh; 536 537 rc = ocfs2_read_block(INODE_CACHE(inode), xb_blkno, &tmp, 538 ocfs2_validate_xattr_block); 539 540 /* If ocfs2_read_block() got us a new bh, pass it up. */ 541 if (!rc && !*bh) 542 *bh = tmp; 543 544 return rc; 545 } 546 547 static inline const char *ocfs2_xattr_prefix(int name_index) 548 { 549 const struct xattr_handler *handler = NULL; 550 551 if (name_index > 0 && name_index < OCFS2_XATTR_MAX) 552 handler = ocfs2_xattr_handler_map[name_index]; 553 554 return handler ? handler->prefix : NULL; 555 } 556 557 static u32 ocfs2_xattr_name_hash(struct inode *inode, 558 const char *name, 559 int name_len) 560 { 561 /* Get hash value of uuid from super block */ 562 u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash; 563 int i; 564 565 /* hash extended attribute name */ 566 for (i = 0; i < name_len; i++) { 567 hash = (hash << OCFS2_HASH_SHIFT) ^ 568 (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^ 569 *name++; 570 } 571 572 return hash; 573 } 574 575 static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len) 576 { 577 return namevalue_size(name_len, value_len) + 578 sizeof(struct ocfs2_xattr_entry); 579 } 580 581 static int ocfs2_xi_entry_usage(struct ocfs2_xattr_info *xi) 582 { 583 return namevalue_size_xi(xi) + 584 sizeof(struct ocfs2_xattr_entry); 585 } 586 587 static int ocfs2_xe_entry_usage(struct ocfs2_xattr_entry *xe) 588 { 589 return namevalue_size_xe(xe) + 590 sizeof(struct ocfs2_xattr_entry); 591 } 592 593 int ocfs2_calc_security_init(struct inode *dir, 594 struct ocfs2_security_xattr_info *si, 595 int *want_clusters, 596 int *xattr_credits, 597 struct ocfs2_alloc_context **xattr_ac) 598 { 599 int ret = 0; 600 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); 601 int s_size = ocfs2_xattr_entry_real_size(strlen(si->name), 602 si->value_len); 603 604 /* 605 * The max space of security xattr taken inline is 606 * 256(name) + 80(value) + 16(entry) = 352 bytes, 607 * So reserve one metadata block for it is ok. 608 */ 609 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE || 610 s_size > OCFS2_XATTR_FREE_IN_IBODY) { 611 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac); 612 if (ret) { 613 mlog_errno(ret); 614 return ret; 615 } 616 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; 617 } 618 619 /* reserve clusters for xattr value which will be set in B tree*/ 620 if (si->value_len > OCFS2_XATTR_INLINE_SIZE) { 621 int new_clusters = ocfs2_clusters_for_bytes(dir->i_sb, 622 si->value_len); 623 624 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb, 625 new_clusters); 626 *want_clusters += new_clusters; 627 } 628 return ret; 629 } 630 631 int ocfs2_calc_xattr_init(struct inode *dir, 632 struct buffer_head *dir_bh, 633 umode_t mode, 634 struct ocfs2_security_xattr_info *si, 635 int *want_clusters, 636 int *xattr_credits, 637 int *want_meta) 638 { 639 int ret = 0; 640 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); 641 int s_size = 0, a_size = 0, acl_len = 0, new_clusters; 642 643 if (si->enable) 644 s_size = ocfs2_xattr_entry_real_size(strlen(si->name), 645 si->value_len); 646 647 if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) { 648 acl_len = ocfs2_xattr_get_nolock(dir, dir_bh, 649 OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT, 650 "", NULL, 0); 651 if (acl_len > 0) { 652 a_size = ocfs2_xattr_entry_real_size(0, acl_len); 653 if (S_ISDIR(mode)) 654 a_size <<= 1; 655 } else if (acl_len != 0 && acl_len != -ENODATA) { 656 mlog_errno(ret); 657 return ret; 658 } 659 } 660 661 if (!(s_size + a_size)) 662 return ret; 663 664 /* 665 * The max space of security xattr taken inline is 666 * 256(name) + 80(value) + 16(entry) = 352 bytes, 667 * The max space of acl xattr taken inline is 668 * 80(value) + 16(entry) * 2(if directory) = 192 bytes, 669 * when blocksize = 512, may reserve one more cluser for 670 * xattr bucket, otherwise reserve one metadata block 671 * for them is ok. 672 * If this is a new directory with inline data, 673 * we choose to reserve the entire inline area for 674 * directory contents and force an external xattr block. 675 */ 676 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE || 677 (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) || 678 (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) { 679 *want_meta = *want_meta + 1; 680 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; 681 } 682 683 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE && 684 (s_size + a_size) > OCFS2_XATTR_FREE_IN_BLOCK(dir)) { 685 *want_clusters += 1; 686 *xattr_credits += ocfs2_blocks_per_xattr_bucket(dir->i_sb); 687 } 688 689 /* 690 * reserve credits and clusters for xattrs which has large value 691 * and have to be set outside 692 */ 693 if (si->enable && si->value_len > OCFS2_XATTR_INLINE_SIZE) { 694 new_clusters = ocfs2_clusters_for_bytes(dir->i_sb, 695 si->value_len); 696 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb, 697 new_clusters); 698 *want_clusters += new_clusters; 699 } 700 if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL && 701 acl_len > OCFS2_XATTR_INLINE_SIZE) { 702 /* for directory, it has DEFAULT and ACCESS two types of acls */ 703 new_clusters = (S_ISDIR(mode) ? 2 : 1) * 704 ocfs2_clusters_for_bytes(dir->i_sb, acl_len); 705 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb, 706 new_clusters); 707 *want_clusters += new_clusters; 708 } 709 710 return ret; 711 } 712 713 static int ocfs2_xattr_extend_allocation(struct inode *inode, 714 u32 clusters_to_add, 715 struct ocfs2_xattr_value_buf *vb, 716 struct ocfs2_xattr_set_ctxt *ctxt) 717 { 718 int status = 0, credits; 719 handle_t *handle = ctxt->handle; 720 enum ocfs2_alloc_restarted why; 721 u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters); 722 struct ocfs2_extent_tree et; 723 724 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); 725 726 while (clusters_to_add) { 727 trace_ocfs2_xattr_extend_allocation(clusters_to_add); 728 729 status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh, 730 OCFS2_JOURNAL_ACCESS_WRITE); 731 if (status < 0) { 732 mlog_errno(status); 733 break; 734 } 735 736 prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters); 737 status = ocfs2_add_clusters_in_btree(handle, 738 &et, 739 &logical_start, 740 clusters_to_add, 741 0, 742 ctxt->data_ac, 743 ctxt->meta_ac, 744 &why); 745 if ((status < 0) && (status != -EAGAIN)) { 746 if (status != -ENOSPC) 747 mlog_errno(status); 748 break; 749 } 750 751 ocfs2_journal_dirty(handle, vb->vb_bh); 752 753 clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - 754 prev_clusters; 755 756 if (why != RESTART_NONE && clusters_to_add) { 757 /* 758 * We can only fail in case the alloc file doesn't give 759 * up enough clusters. 760 */ 761 BUG_ON(why == RESTART_META); 762 763 credits = ocfs2_calc_extend_credits(inode->i_sb, 764 &vb->vb_xv->xr_list); 765 status = ocfs2_extend_trans(handle, credits); 766 if (status < 0) { 767 status = -ENOMEM; 768 mlog_errno(status); 769 break; 770 } 771 } 772 } 773 774 return status; 775 } 776 777 static int __ocfs2_remove_xattr_range(struct inode *inode, 778 struct ocfs2_xattr_value_buf *vb, 779 u32 cpos, u32 phys_cpos, u32 len, 780 unsigned int ext_flags, 781 struct ocfs2_xattr_set_ctxt *ctxt) 782 { 783 int ret; 784 u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); 785 handle_t *handle = ctxt->handle; 786 struct ocfs2_extent_tree et; 787 788 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); 789 790 ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh, 791 OCFS2_JOURNAL_ACCESS_WRITE); 792 if (ret) { 793 mlog_errno(ret); 794 goto out; 795 } 796 797 ret = ocfs2_remove_extent(handle, &et, cpos, len, ctxt->meta_ac, 798 &ctxt->dealloc); 799 if (ret) { 800 mlog_errno(ret); 801 goto out; 802 } 803 804 le32_add_cpu(&vb->vb_xv->xr_clusters, -len); 805 ocfs2_journal_dirty(handle, vb->vb_bh); 806 807 if (ext_flags & OCFS2_EXT_REFCOUNTED) 808 ret = ocfs2_decrease_refcount(inode, handle, 809 ocfs2_blocks_to_clusters(inode->i_sb, 810 phys_blkno), 811 len, ctxt->meta_ac, &ctxt->dealloc, 1); 812 else 813 ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc, 814 phys_blkno, len); 815 if (ret) 816 mlog_errno(ret); 817 818 out: 819 return ret; 820 } 821 822 static int ocfs2_xattr_shrink_size(struct inode *inode, 823 u32 old_clusters, 824 u32 new_clusters, 825 struct ocfs2_xattr_value_buf *vb, 826 struct ocfs2_xattr_set_ctxt *ctxt) 827 { 828 int ret = 0; 829 unsigned int ext_flags; 830 u32 trunc_len, cpos, phys_cpos, alloc_size; 831 u64 block; 832 833 if (old_clusters <= new_clusters) 834 return 0; 835 836 cpos = new_clusters; 837 trunc_len = old_clusters - new_clusters; 838 while (trunc_len) { 839 ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos, 840 &alloc_size, 841 &vb->vb_xv->xr_list, &ext_flags); 842 if (ret) { 843 mlog_errno(ret); 844 goto out; 845 } 846 847 if (alloc_size > trunc_len) 848 alloc_size = trunc_len; 849 850 ret = __ocfs2_remove_xattr_range(inode, vb, cpos, 851 phys_cpos, alloc_size, 852 ext_flags, ctxt); 853 if (ret) { 854 mlog_errno(ret); 855 goto out; 856 } 857 858 block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); 859 ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), 860 block, alloc_size); 861 cpos += alloc_size; 862 trunc_len -= alloc_size; 863 } 864 865 out: 866 return ret; 867 } 868 869 static int ocfs2_xattr_value_truncate(struct inode *inode, 870 struct ocfs2_xattr_value_buf *vb, 871 int len, 872 struct ocfs2_xattr_set_ctxt *ctxt) 873 { 874 int ret; 875 u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len); 876 u32 old_clusters = le32_to_cpu(vb->vb_xv->xr_clusters); 877 878 if (new_clusters == old_clusters) 879 return 0; 880 881 if (new_clusters > old_clusters) 882 ret = ocfs2_xattr_extend_allocation(inode, 883 new_clusters - old_clusters, 884 vb, ctxt); 885 else 886 ret = ocfs2_xattr_shrink_size(inode, 887 old_clusters, new_clusters, 888 vb, ctxt); 889 890 return ret; 891 } 892 893 static int ocfs2_xattr_list_entry(char *buffer, size_t size, 894 size_t *result, const char *prefix, 895 const char *name, int name_len) 896 { 897 char *p = buffer + *result; 898 int prefix_len = strlen(prefix); 899 int total_len = prefix_len + name_len + 1; 900 901 *result += total_len; 902 903 /* we are just looking for how big our buffer needs to be */ 904 if (!size) 905 return 0; 906 907 if (*result > size) 908 return -ERANGE; 909 910 memcpy(p, prefix, prefix_len); 911 memcpy(p + prefix_len, name, name_len); 912 p[prefix_len + name_len] = '\0'; 913 914 return 0; 915 } 916 917 static int ocfs2_xattr_list_entries(struct inode *inode, 918 struct ocfs2_xattr_header *header, 919 char *buffer, size_t buffer_size) 920 { 921 size_t result = 0; 922 int i, type, ret; 923 const char *prefix, *name; 924 925 for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) { 926 struct ocfs2_xattr_entry *entry = &header->xh_entries[i]; 927 type = ocfs2_xattr_get_type(entry); 928 prefix = ocfs2_xattr_prefix(type); 929 930 if (prefix) { 931 name = (const char *)header + 932 le16_to_cpu(entry->xe_name_offset); 933 934 ret = ocfs2_xattr_list_entry(buffer, buffer_size, 935 &result, prefix, name, 936 entry->xe_name_len); 937 if (ret) 938 return ret; 939 } 940 } 941 942 return result; 943 } 944 945 int ocfs2_has_inline_xattr_value_outside(struct inode *inode, 946 struct ocfs2_dinode *di) 947 { 948 struct ocfs2_xattr_header *xh; 949 int i; 950 951 xh = (struct ocfs2_xattr_header *) 952 ((void *)di + inode->i_sb->s_blocksize - 953 le16_to_cpu(di->i_xattr_inline_size)); 954 955 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) 956 if (!ocfs2_xattr_is_local(&xh->xh_entries[i])) 957 return 1; 958 959 return 0; 960 } 961 962 static int ocfs2_xattr_ibody_list(struct inode *inode, 963 struct ocfs2_dinode *di, 964 char *buffer, 965 size_t buffer_size) 966 { 967 struct ocfs2_xattr_header *header = NULL; 968 struct ocfs2_inode_info *oi = OCFS2_I(inode); 969 int ret = 0; 970 971 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) 972 return ret; 973 974 header = (struct ocfs2_xattr_header *) 975 ((void *)di + inode->i_sb->s_blocksize - 976 le16_to_cpu(di->i_xattr_inline_size)); 977 978 ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size); 979 980 return ret; 981 } 982 983 static int ocfs2_xattr_block_list(struct inode *inode, 984 struct ocfs2_dinode *di, 985 char *buffer, 986 size_t buffer_size) 987 { 988 struct buffer_head *blk_bh = NULL; 989 struct ocfs2_xattr_block *xb; 990 int ret = 0; 991 992 if (!di->i_xattr_loc) 993 return ret; 994 995 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc), 996 &blk_bh); 997 if (ret < 0) { 998 mlog_errno(ret); 999 return ret; 1000 } 1001 1002 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 1003 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 1004 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header; 1005 ret = ocfs2_xattr_list_entries(inode, header, 1006 buffer, buffer_size); 1007 } else 1008 ret = ocfs2_xattr_tree_list_index_block(inode, blk_bh, 1009 buffer, buffer_size); 1010 1011 brelse(blk_bh); 1012 1013 return ret; 1014 } 1015 1016 ssize_t ocfs2_listxattr(struct dentry *dentry, 1017 char *buffer, 1018 size_t size) 1019 { 1020 int ret = 0, i_ret = 0, b_ret = 0; 1021 struct buffer_head *di_bh = NULL; 1022 struct ocfs2_dinode *di = NULL; 1023 struct ocfs2_inode_info *oi = OCFS2_I(d_inode(dentry)); 1024 1025 if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb))) 1026 return -EOPNOTSUPP; 1027 1028 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) 1029 return ret; 1030 1031 ret = ocfs2_inode_lock(d_inode(dentry), &di_bh, 0); 1032 if (ret < 0) { 1033 mlog_errno(ret); 1034 return ret; 1035 } 1036 1037 di = (struct ocfs2_dinode *)di_bh->b_data; 1038 1039 down_read(&oi->ip_xattr_sem); 1040 i_ret = ocfs2_xattr_ibody_list(d_inode(dentry), di, buffer, size); 1041 if (i_ret < 0) 1042 b_ret = 0; 1043 else { 1044 if (buffer) { 1045 buffer += i_ret; 1046 size -= i_ret; 1047 } 1048 b_ret = ocfs2_xattr_block_list(d_inode(dentry), di, 1049 buffer, size); 1050 if (b_ret < 0) 1051 i_ret = 0; 1052 } 1053 up_read(&oi->ip_xattr_sem); 1054 ocfs2_inode_unlock(d_inode(dentry), 0); 1055 1056 brelse(di_bh); 1057 1058 return i_ret + b_ret; 1059 } 1060 1061 static int ocfs2_xattr_find_entry(int name_index, 1062 const char *name, 1063 struct ocfs2_xattr_search *xs) 1064 { 1065 struct ocfs2_xattr_entry *entry; 1066 size_t name_len; 1067 int i, cmp = 1; 1068 1069 if (name == NULL) 1070 return -EINVAL; 1071 1072 name_len = strlen(name); 1073 entry = xs->here; 1074 for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) { 1075 cmp = name_index - ocfs2_xattr_get_type(entry); 1076 if (!cmp) 1077 cmp = name_len - entry->xe_name_len; 1078 if (!cmp) 1079 cmp = memcmp(name, (xs->base + 1080 le16_to_cpu(entry->xe_name_offset)), 1081 name_len); 1082 if (cmp == 0) 1083 break; 1084 entry += 1; 1085 } 1086 xs->here = entry; 1087 1088 return cmp ? -ENODATA : 0; 1089 } 1090 1091 static int ocfs2_xattr_get_value_outside(struct inode *inode, 1092 struct ocfs2_xattr_value_root *xv, 1093 void *buffer, 1094 size_t len) 1095 { 1096 u32 cpos, p_cluster, num_clusters, bpc, clusters; 1097 u64 blkno; 1098 int i, ret = 0; 1099 size_t cplen, blocksize; 1100 struct buffer_head *bh = NULL; 1101 struct ocfs2_extent_list *el; 1102 1103 el = &xv->xr_list; 1104 clusters = le32_to_cpu(xv->xr_clusters); 1105 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 1106 blocksize = inode->i_sb->s_blocksize; 1107 1108 cpos = 0; 1109 while (cpos < clusters) { 1110 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, 1111 &num_clusters, el, NULL); 1112 if (ret) { 1113 mlog_errno(ret); 1114 goto out; 1115 } 1116 1117 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 1118 /* Copy ocfs2_xattr_value */ 1119 for (i = 0; i < num_clusters * bpc; i++, blkno++) { 1120 ret = ocfs2_read_block(INODE_CACHE(inode), blkno, 1121 &bh, NULL); 1122 if (ret) { 1123 mlog_errno(ret); 1124 goto out; 1125 } 1126 1127 cplen = len >= blocksize ? blocksize : len; 1128 memcpy(buffer, bh->b_data, cplen); 1129 len -= cplen; 1130 buffer += cplen; 1131 1132 brelse(bh); 1133 bh = NULL; 1134 if (len == 0) 1135 break; 1136 } 1137 cpos += num_clusters; 1138 } 1139 out: 1140 return ret; 1141 } 1142 1143 static int ocfs2_xattr_ibody_get(struct inode *inode, 1144 int name_index, 1145 const char *name, 1146 void *buffer, 1147 size_t buffer_size, 1148 struct ocfs2_xattr_search *xs) 1149 { 1150 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1151 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 1152 struct ocfs2_xattr_value_root *xv; 1153 size_t size; 1154 int ret = 0; 1155 1156 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) 1157 return -ENODATA; 1158 1159 xs->end = (void *)di + inode->i_sb->s_blocksize; 1160 xs->header = (struct ocfs2_xattr_header *) 1161 (xs->end - le16_to_cpu(di->i_xattr_inline_size)); 1162 xs->base = (void *)xs->header; 1163 xs->here = xs->header->xh_entries; 1164 1165 ret = ocfs2_xattr_find_entry(name_index, name, xs); 1166 if (ret) 1167 return ret; 1168 size = le64_to_cpu(xs->here->xe_value_size); 1169 if (buffer) { 1170 if (size > buffer_size) 1171 return -ERANGE; 1172 if (ocfs2_xattr_is_local(xs->here)) { 1173 memcpy(buffer, (void *)xs->base + 1174 le16_to_cpu(xs->here->xe_name_offset) + 1175 OCFS2_XATTR_SIZE(xs->here->xe_name_len), size); 1176 } else { 1177 xv = (struct ocfs2_xattr_value_root *) 1178 (xs->base + le16_to_cpu( 1179 xs->here->xe_name_offset) + 1180 OCFS2_XATTR_SIZE(xs->here->xe_name_len)); 1181 ret = ocfs2_xattr_get_value_outside(inode, xv, 1182 buffer, size); 1183 if (ret < 0) { 1184 mlog_errno(ret); 1185 return ret; 1186 } 1187 } 1188 } 1189 1190 return size; 1191 } 1192 1193 static int ocfs2_xattr_block_get(struct inode *inode, 1194 int name_index, 1195 const char *name, 1196 void *buffer, 1197 size_t buffer_size, 1198 struct ocfs2_xattr_search *xs) 1199 { 1200 struct ocfs2_xattr_block *xb; 1201 struct ocfs2_xattr_value_root *xv; 1202 size_t size; 1203 int ret = -ENODATA, name_offset, name_len, i; 1204 int uninitialized_var(block_off); 1205 1206 xs->bucket = ocfs2_xattr_bucket_new(inode); 1207 if (!xs->bucket) { 1208 ret = -ENOMEM; 1209 mlog_errno(ret); 1210 goto cleanup; 1211 } 1212 1213 ret = ocfs2_xattr_block_find(inode, name_index, name, xs); 1214 if (ret) { 1215 mlog_errno(ret); 1216 goto cleanup; 1217 } 1218 1219 if (xs->not_found) { 1220 ret = -ENODATA; 1221 goto cleanup; 1222 } 1223 1224 xb = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; 1225 size = le64_to_cpu(xs->here->xe_value_size); 1226 if (buffer) { 1227 ret = -ERANGE; 1228 if (size > buffer_size) 1229 goto cleanup; 1230 1231 name_offset = le16_to_cpu(xs->here->xe_name_offset); 1232 name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len); 1233 i = xs->here - xs->header->xh_entries; 1234 1235 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 1236 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 1237 bucket_xh(xs->bucket), 1238 i, 1239 &block_off, 1240 &name_offset); 1241 if (ret) { 1242 mlog_errno(ret); 1243 goto cleanup; 1244 } 1245 xs->base = bucket_block(xs->bucket, block_off); 1246 } 1247 if (ocfs2_xattr_is_local(xs->here)) { 1248 memcpy(buffer, (void *)xs->base + 1249 name_offset + name_len, size); 1250 } else { 1251 xv = (struct ocfs2_xattr_value_root *) 1252 (xs->base + name_offset + name_len); 1253 ret = ocfs2_xattr_get_value_outside(inode, xv, 1254 buffer, size); 1255 if (ret < 0) { 1256 mlog_errno(ret); 1257 goto cleanup; 1258 } 1259 } 1260 } 1261 ret = size; 1262 cleanup: 1263 ocfs2_xattr_bucket_free(xs->bucket); 1264 1265 brelse(xs->xattr_bh); 1266 xs->xattr_bh = NULL; 1267 return ret; 1268 } 1269 1270 int ocfs2_xattr_get_nolock(struct inode *inode, 1271 struct buffer_head *di_bh, 1272 int name_index, 1273 const char *name, 1274 void *buffer, 1275 size_t buffer_size) 1276 { 1277 int ret; 1278 struct ocfs2_dinode *di = NULL; 1279 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1280 struct ocfs2_xattr_search xis = { 1281 .not_found = -ENODATA, 1282 }; 1283 struct ocfs2_xattr_search xbs = { 1284 .not_found = -ENODATA, 1285 }; 1286 1287 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) 1288 return -EOPNOTSUPP; 1289 1290 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) 1291 return -ENODATA; 1292 1293 xis.inode_bh = xbs.inode_bh = di_bh; 1294 di = (struct ocfs2_dinode *)di_bh->b_data; 1295 1296 ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer, 1297 buffer_size, &xis); 1298 if (ret == -ENODATA && di->i_xattr_loc) 1299 ret = ocfs2_xattr_block_get(inode, name_index, name, buffer, 1300 buffer_size, &xbs); 1301 1302 return ret; 1303 } 1304 1305 /* ocfs2_xattr_get() 1306 * 1307 * Copy an extended attribute into the buffer provided. 1308 * Buffer is NULL to compute the size of buffer required. 1309 */ 1310 static int ocfs2_xattr_get(struct inode *inode, 1311 int name_index, 1312 const char *name, 1313 void *buffer, 1314 size_t buffer_size) 1315 { 1316 int ret; 1317 struct buffer_head *di_bh = NULL; 1318 1319 ret = ocfs2_inode_lock(inode, &di_bh, 0); 1320 if (ret < 0) { 1321 mlog_errno(ret); 1322 return ret; 1323 } 1324 down_read(&OCFS2_I(inode)->ip_xattr_sem); 1325 ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index, 1326 name, buffer, buffer_size); 1327 up_read(&OCFS2_I(inode)->ip_xattr_sem); 1328 1329 ocfs2_inode_unlock(inode, 0); 1330 1331 brelse(di_bh); 1332 1333 return ret; 1334 } 1335 1336 static int __ocfs2_xattr_set_value_outside(struct inode *inode, 1337 handle_t *handle, 1338 struct ocfs2_xattr_value_buf *vb, 1339 const void *value, 1340 int value_len) 1341 { 1342 int ret = 0, i, cp_len; 1343 u16 blocksize = inode->i_sb->s_blocksize; 1344 u32 p_cluster, num_clusters; 1345 u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 1346 u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len); 1347 u64 blkno; 1348 struct buffer_head *bh = NULL; 1349 unsigned int ext_flags; 1350 struct ocfs2_xattr_value_root *xv = vb->vb_xv; 1351 1352 BUG_ON(clusters > le32_to_cpu(xv->xr_clusters)); 1353 1354 while (cpos < clusters) { 1355 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, 1356 &num_clusters, &xv->xr_list, 1357 &ext_flags); 1358 if (ret) { 1359 mlog_errno(ret); 1360 goto out; 1361 } 1362 1363 BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED); 1364 1365 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 1366 1367 for (i = 0; i < num_clusters * bpc; i++, blkno++) { 1368 ret = ocfs2_read_block(INODE_CACHE(inode), blkno, 1369 &bh, NULL); 1370 if (ret) { 1371 mlog_errno(ret); 1372 goto out; 1373 } 1374 1375 ret = ocfs2_journal_access(handle, 1376 INODE_CACHE(inode), 1377 bh, 1378 OCFS2_JOURNAL_ACCESS_WRITE); 1379 if (ret < 0) { 1380 mlog_errno(ret); 1381 goto out; 1382 } 1383 1384 cp_len = value_len > blocksize ? blocksize : value_len; 1385 memcpy(bh->b_data, value, cp_len); 1386 value_len -= cp_len; 1387 value += cp_len; 1388 if (cp_len < blocksize) 1389 memset(bh->b_data + cp_len, 0, 1390 blocksize - cp_len); 1391 1392 ocfs2_journal_dirty(handle, bh); 1393 brelse(bh); 1394 bh = NULL; 1395 1396 /* 1397 * XXX: do we need to empty all the following 1398 * blocks in this cluster? 1399 */ 1400 if (!value_len) 1401 break; 1402 } 1403 cpos += num_clusters; 1404 } 1405 out: 1406 brelse(bh); 1407 1408 return ret; 1409 } 1410 1411 static int ocfs2_xa_check_space_helper(int needed_space, int free_start, 1412 int num_entries) 1413 { 1414 int free_space; 1415 1416 if (!needed_space) 1417 return 0; 1418 1419 free_space = free_start - 1420 sizeof(struct ocfs2_xattr_header) - 1421 (num_entries * sizeof(struct ocfs2_xattr_entry)) - 1422 OCFS2_XATTR_HEADER_GAP; 1423 if (free_space < 0) 1424 return -EIO; 1425 if (free_space < needed_space) 1426 return -ENOSPC; 1427 1428 return 0; 1429 } 1430 1431 static int ocfs2_xa_journal_access(handle_t *handle, struct ocfs2_xa_loc *loc, 1432 int type) 1433 { 1434 return loc->xl_ops->xlo_journal_access(handle, loc, type); 1435 } 1436 1437 static void ocfs2_xa_journal_dirty(handle_t *handle, struct ocfs2_xa_loc *loc) 1438 { 1439 loc->xl_ops->xlo_journal_dirty(handle, loc); 1440 } 1441 1442 /* Give a pointer into the storage for the given offset */ 1443 static void *ocfs2_xa_offset_pointer(struct ocfs2_xa_loc *loc, int offset) 1444 { 1445 BUG_ON(offset >= loc->xl_size); 1446 return loc->xl_ops->xlo_offset_pointer(loc, offset); 1447 } 1448 1449 /* 1450 * Wipe the name+value pair and allow the storage to reclaim it. This 1451 * must be followed by either removal of the entry or a call to 1452 * ocfs2_xa_add_namevalue(). 1453 */ 1454 static void ocfs2_xa_wipe_namevalue(struct ocfs2_xa_loc *loc) 1455 { 1456 loc->xl_ops->xlo_wipe_namevalue(loc); 1457 } 1458 1459 /* 1460 * Find lowest offset to a name+value pair. This is the start of our 1461 * downward-growing free space. 1462 */ 1463 static int ocfs2_xa_get_free_start(struct ocfs2_xa_loc *loc) 1464 { 1465 return loc->xl_ops->xlo_get_free_start(loc); 1466 } 1467 1468 /* Can we reuse loc->xl_entry for xi? */ 1469 static int ocfs2_xa_can_reuse_entry(struct ocfs2_xa_loc *loc, 1470 struct ocfs2_xattr_info *xi) 1471 { 1472 return loc->xl_ops->xlo_can_reuse(loc, xi); 1473 } 1474 1475 /* How much free space is needed to set the new value */ 1476 static int ocfs2_xa_check_space(struct ocfs2_xa_loc *loc, 1477 struct ocfs2_xattr_info *xi) 1478 { 1479 return loc->xl_ops->xlo_check_space(loc, xi); 1480 } 1481 1482 static void ocfs2_xa_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) 1483 { 1484 loc->xl_ops->xlo_add_entry(loc, name_hash); 1485 loc->xl_entry->xe_name_hash = cpu_to_le32(name_hash); 1486 /* 1487 * We can't leave the new entry's xe_name_offset at zero or 1488 * add_namevalue() will go nuts. We set it to the size of our 1489 * storage so that it can never be less than any other entry. 1490 */ 1491 loc->xl_entry->xe_name_offset = cpu_to_le16(loc->xl_size); 1492 } 1493 1494 static void ocfs2_xa_add_namevalue(struct ocfs2_xa_loc *loc, 1495 struct ocfs2_xattr_info *xi) 1496 { 1497 int size = namevalue_size_xi(xi); 1498 int nameval_offset; 1499 char *nameval_buf; 1500 1501 loc->xl_ops->xlo_add_namevalue(loc, size); 1502 loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len); 1503 loc->xl_entry->xe_name_len = xi->xi_name_len; 1504 ocfs2_xattr_set_type(loc->xl_entry, xi->xi_name_index); 1505 ocfs2_xattr_set_local(loc->xl_entry, 1506 xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE); 1507 1508 nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 1509 nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset); 1510 memset(nameval_buf, 0, size); 1511 memcpy(nameval_buf, xi->xi_name, xi->xi_name_len); 1512 } 1513 1514 static void ocfs2_xa_fill_value_buf(struct ocfs2_xa_loc *loc, 1515 struct ocfs2_xattr_value_buf *vb) 1516 { 1517 int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 1518 int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len); 1519 1520 /* Value bufs are for value trees */ 1521 BUG_ON(ocfs2_xattr_is_local(loc->xl_entry)); 1522 BUG_ON(namevalue_size_xe(loc->xl_entry) != 1523 (name_size + OCFS2_XATTR_ROOT_SIZE)); 1524 1525 loc->xl_ops->xlo_fill_value_buf(loc, vb); 1526 vb->vb_xv = 1527 (struct ocfs2_xattr_value_root *)ocfs2_xa_offset_pointer(loc, 1528 nameval_offset + 1529 name_size); 1530 } 1531 1532 static int ocfs2_xa_block_journal_access(handle_t *handle, 1533 struct ocfs2_xa_loc *loc, int type) 1534 { 1535 struct buffer_head *bh = loc->xl_storage; 1536 ocfs2_journal_access_func access; 1537 1538 if (loc->xl_size == (bh->b_size - 1539 offsetof(struct ocfs2_xattr_block, 1540 xb_attrs.xb_header))) 1541 access = ocfs2_journal_access_xb; 1542 else 1543 access = ocfs2_journal_access_di; 1544 return access(handle, INODE_CACHE(loc->xl_inode), bh, type); 1545 } 1546 1547 static void ocfs2_xa_block_journal_dirty(handle_t *handle, 1548 struct ocfs2_xa_loc *loc) 1549 { 1550 struct buffer_head *bh = loc->xl_storage; 1551 1552 ocfs2_journal_dirty(handle, bh); 1553 } 1554 1555 static void *ocfs2_xa_block_offset_pointer(struct ocfs2_xa_loc *loc, 1556 int offset) 1557 { 1558 return (char *)loc->xl_header + offset; 1559 } 1560 1561 static int ocfs2_xa_block_can_reuse(struct ocfs2_xa_loc *loc, 1562 struct ocfs2_xattr_info *xi) 1563 { 1564 /* 1565 * Block storage is strict. If the sizes aren't exact, we will 1566 * remove the old one and reinsert the new. 1567 */ 1568 return namevalue_size_xe(loc->xl_entry) == 1569 namevalue_size_xi(xi); 1570 } 1571 1572 static int ocfs2_xa_block_get_free_start(struct ocfs2_xa_loc *loc) 1573 { 1574 struct ocfs2_xattr_header *xh = loc->xl_header; 1575 int i, count = le16_to_cpu(xh->xh_count); 1576 int offset, free_start = loc->xl_size; 1577 1578 for (i = 0; i < count; i++) { 1579 offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset); 1580 if (offset < free_start) 1581 free_start = offset; 1582 } 1583 1584 return free_start; 1585 } 1586 1587 static int ocfs2_xa_block_check_space(struct ocfs2_xa_loc *loc, 1588 struct ocfs2_xattr_info *xi) 1589 { 1590 int count = le16_to_cpu(loc->xl_header->xh_count); 1591 int free_start = ocfs2_xa_get_free_start(loc); 1592 int needed_space = ocfs2_xi_entry_usage(xi); 1593 1594 /* 1595 * Block storage will reclaim the original entry before inserting 1596 * the new value, so we only need the difference. If the new 1597 * entry is smaller than the old one, we don't need anything. 1598 */ 1599 if (loc->xl_entry) { 1600 /* Don't need space if we're reusing! */ 1601 if (ocfs2_xa_can_reuse_entry(loc, xi)) 1602 needed_space = 0; 1603 else 1604 needed_space -= ocfs2_xe_entry_usage(loc->xl_entry); 1605 } 1606 if (needed_space < 0) 1607 needed_space = 0; 1608 return ocfs2_xa_check_space_helper(needed_space, free_start, count); 1609 } 1610 1611 /* 1612 * Block storage for xattrs keeps the name+value pairs compacted. When 1613 * we remove one, we have to shift any that preceded it towards the end. 1614 */ 1615 static void ocfs2_xa_block_wipe_namevalue(struct ocfs2_xa_loc *loc) 1616 { 1617 int i, offset; 1618 int namevalue_offset, first_namevalue_offset, namevalue_size; 1619 struct ocfs2_xattr_entry *entry = loc->xl_entry; 1620 struct ocfs2_xattr_header *xh = loc->xl_header; 1621 int count = le16_to_cpu(xh->xh_count); 1622 1623 namevalue_offset = le16_to_cpu(entry->xe_name_offset); 1624 namevalue_size = namevalue_size_xe(entry); 1625 first_namevalue_offset = ocfs2_xa_get_free_start(loc); 1626 1627 /* Shift the name+value pairs */ 1628 memmove((char *)xh + first_namevalue_offset + namevalue_size, 1629 (char *)xh + first_namevalue_offset, 1630 namevalue_offset - first_namevalue_offset); 1631 memset((char *)xh + first_namevalue_offset, 0, namevalue_size); 1632 1633 /* Now tell xh->xh_entries about it */ 1634 for (i = 0; i < count; i++) { 1635 offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset); 1636 if (offset <= namevalue_offset) 1637 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, 1638 namevalue_size); 1639 } 1640 1641 /* 1642 * Note that we don't update xh_free_start or xh_name_value_len 1643 * because they're not used in block-stored xattrs. 1644 */ 1645 } 1646 1647 static void ocfs2_xa_block_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) 1648 { 1649 int count = le16_to_cpu(loc->xl_header->xh_count); 1650 loc->xl_entry = &(loc->xl_header->xh_entries[count]); 1651 le16_add_cpu(&loc->xl_header->xh_count, 1); 1652 memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry)); 1653 } 1654 1655 static void ocfs2_xa_block_add_namevalue(struct ocfs2_xa_loc *loc, int size) 1656 { 1657 int free_start = ocfs2_xa_get_free_start(loc); 1658 1659 loc->xl_entry->xe_name_offset = cpu_to_le16(free_start - size); 1660 } 1661 1662 static void ocfs2_xa_block_fill_value_buf(struct ocfs2_xa_loc *loc, 1663 struct ocfs2_xattr_value_buf *vb) 1664 { 1665 struct buffer_head *bh = loc->xl_storage; 1666 1667 if (loc->xl_size == (bh->b_size - 1668 offsetof(struct ocfs2_xattr_block, 1669 xb_attrs.xb_header))) 1670 vb->vb_access = ocfs2_journal_access_xb; 1671 else 1672 vb->vb_access = ocfs2_journal_access_di; 1673 vb->vb_bh = bh; 1674 } 1675 1676 /* 1677 * Operations for xattrs stored in blocks. This includes inline inode 1678 * storage and unindexed ocfs2_xattr_blocks. 1679 */ 1680 static const struct ocfs2_xa_loc_operations ocfs2_xa_block_loc_ops = { 1681 .xlo_journal_access = ocfs2_xa_block_journal_access, 1682 .xlo_journal_dirty = ocfs2_xa_block_journal_dirty, 1683 .xlo_offset_pointer = ocfs2_xa_block_offset_pointer, 1684 .xlo_check_space = ocfs2_xa_block_check_space, 1685 .xlo_can_reuse = ocfs2_xa_block_can_reuse, 1686 .xlo_get_free_start = ocfs2_xa_block_get_free_start, 1687 .xlo_wipe_namevalue = ocfs2_xa_block_wipe_namevalue, 1688 .xlo_add_entry = ocfs2_xa_block_add_entry, 1689 .xlo_add_namevalue = ocfs2_xa_block_add_namevalue, 1690 .xlo_fill_value_buf = ocfs2_xa_block_fill_value_buf, 1691 }; 1692 1693 static int ocfs2_xa_bucket_journal_access(handle_t *handle, 1694 struct ocfs2_xa_loc *loc, int type) 1695 { 1696 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1697 1698 return ocfs2_xattr_bucket_journal_access(handle, bucket, type); 1699 } 1700 1701 static void ocfs2_xa_bucket_journal_dirty(handle_t *handle, 1702 struct ocfs2_xa_loc *loc) 1703 { 1704 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1705 1706 ocfs2_xattr_bucket_journal_dirty(handle, bucket); 1707 } 1708 1709 static void *ocfs2_xa_bucket_offset_pointer(struct ocfs2_xa_loc *loc, 1710 int offset) 1711 { 1712 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1713 int block, block_offset; 1714 1715 /* The header is at the front of the bucket */ 1716 block = offset >> loc->xl_inode->i_sb->s_blocksize_bits; 1717 block_offset = offset % loc->xl_inode->i_sb->s_blocksize; 1718 1719 return bucket_block(bucket, block) + block_offset; 1720 } 1721 1722 static int ocfs2_xa_bucket_can_reuse(struct ocfs2_xa_loc *loc, 1723 struct ocfs2_xattr_info *xi) 1724 { 1725 return namevalue_size_xe(loc->xl_entry) >= 1726 namevalue_size_xi(xi); 1727 } 1728 1729 static int ocfs2_xa_bucket_get_free_start(struct ocfs2_xa_loc *loc) 1730 { 1731 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1732 return le16_to_cpu(bucket_xh(bucket)->xh_free_start); 1733 } 1734 1735 static int ocfs2_bucket_align_free_start(struct super_block *sb, 1736 int free_start, int size) 1737 { 1738 /* 1739 * We need to make sure that the name+value pair fits within 1740 * one block. 1741 */ 1742 if (((free_start - size) >> sb->s_blocksize_bits) != 1743 ((free_start - 1) >> sb->s_blocksize_bits)) 1744 free_start -= free_start % sb->s_blocksize; 1745 1746 return free_start; 1747 } 1748 1749 static int ocfs2_xa_bucket_check_space(struct ocfs2_xa_loc *loc, 1750 struct ocfs2_xattr_info *xi) 1751 { 1752 int rc; 1753 int count = le16_to_cpu(loc->xl_header->xh_count); 1754 int free_start = ocfs2_xa_get_free_start(loc); 1755 int needed_space = ocfs2_xi_entry_usage(xi); 1756 int size = namevalue_size_xi(xi); 1757 struct super_block *sb = loc->xl_inode->i_sb; 1758 1759 /* 1760 * Bucket storage does not reclaim name+value pairs it cannot 1761 * reuse. They live as holes until the bucket fills, and then 1762 * the bucket is defragmented. However, the bucket can reclaim 1763 * the ocfs2_xattr_entry. 1764 */ 1765 if (loc->xl_entry) { 1766 /* Don't need space if we're reusing! */ 1767 if (ocfs2_xa_can_reuse_entry(loc, xi)) 1768 needed_space = 0; 1769 else 1770 needed_space -= sizeof(struct ocfs2_xattr_entry); 1771 } 1772 BUG_ON(needed_space < 0); 1773 1774 if (free_start < size) { 1775 if (needed_space) 1776 return -ENOSPC; 1777 } else { 1778 /* 1779 * First we check if it would fit in the first place. 1780 * Below, we align the free start to a block. This may 1781 * slide us below the minimum gap. By checking unaligned 1782 * first, we avoid that error. 1783 */ 1784 rc = ocfs2_xa_check_space_helper(needed_space, free_start, 1785 count); 1786 if (rc) 1787 return rc; 1788 free_start = ocfs2_bucket_align_free_start(sb, free_start, 1789 size); 1790 } 1791 return ocfs2_xa_check_space_helper(needed_space, free_start, count); 1792 } 1793 1794 static void ocfs2_xa_bucket_wipe_namevalue(struct ocfs2_xa_loc *loc) 1795 { 1796 le16_add_cpu(&loc->xl_header->xh_name_value_len, 1797 -namevalue_size_xe(loc->xl_entry)); 1798 } 1799 1800 static void ocfs2_xa_bucket_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) 1801 { 1802 struct ocfs2_xattr_header *xh = loc->xl_header; 1803 int count = le16_to_cpu(xh->xh_count); 1804 int low = 0, high = count - 1, tmp; 1805 struct ocfs2_xattr_entry *tmp_xe; 1806 1807 /* 1808 * We keep buckets sorted by name_hash, so we need to find 1809 * our insert place. 1810 */ 1811 while (low <= high && count) { 1812 tmp = (low + high) / 2; 1813 tmp_xe = &xh->xh_entries[tmp]; 1814 1815 if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash)) 1816 low = tmp + 1; 1817 else if (name_hash < le32_to_cpu(tmp_xe->xe_name_hash)) 1818 high = tmp - 1; 1819 else { 1820 low = tmp; 1821 break; 1822 } 1823 } 1824 1825 if (low != count) 1826 memmove(&xh->xh_entries[low + 1], 1827 &xh->xh_entries[low], 1828 ((count - low) * sizeof(struct ocfs2_xattr_entry))); 1829 1830 le16_add_cpu(&xh->xh_count, 1); 1831 loc->xl_entry = &xh->xh_entries[low]; 1832 memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry)); 1833 } 1834 1835 static void ocfs2_xa_bucket_add_namevalue(struct ocfs2_xa_loc *loc, int size) 1836 { 1837 int free_start = ocfs2_xa_get_free_start(loc); 1838 struct ocfs2_xattr_header *xh = loc->xl_header; 1839 struct super_block *sb = loc->xl_inode->i_sb; 1840 int nameval_offset; 1841 1842 free_start = ocfs2_bucket_align_free_start(sb, free_start, size); 1843 nameval_offset = free_start - size; 1844 loc->xl_entry->xe_name_offset = cpu_to_le16(nameval_offset); 1845 xh->xh_free_start = cpu_to_le16(nameval_offset); 1846 le16_add_cpu(&xh->xh_name_value_len, size); 1847 1848 } 1849 1850 static void ocfs2_xa_bucket_fill_value_buf(struct ocfs2_xa_loc *loc, 1851 struct ocfs2_xattr_value_buf *vb) 1852 { 1853 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1854 struct super_block *sb = loc->xl_inode->i_sb; 1855 int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 1856 int size = namevalue_size_xe(loc->xl_entry); 1857 int block_offset = nameval_offset >> sb->s_blocksize_bits; 1858 1859 /* Values are not allowed to straddle block boundaries */ 1860 BUG_ON(block_offset != 1861 ((nameval_offset + size - 1) >> sb->s_blocksize_bits)); 1862 /* We expect the bucket to be filled in */ 1863 BUG_ON(!bucket->bu_bhs[block_offset]); 1864 1865 vb->vb_access = ocfs2_journal_access; 1866 vb->vb_bh = bucket->bu_bhs[block_offset]; 1867 } 1868 1869 /* Operations for xattrs stored in buckets. */ 1870 static const struct ocfs2_xa_loc_operations ocfs2_xa_bucket_loc_ops = { 1871 .xlo_journal_access = ocfs2_xa_bucket_journal_access, 1872 .xlo_journal_dirty = ocfs2_xa_bucket_journal_dirty, 1873 .xlo_offset_pointer = ocfs2_xa_bucket_offset_pointer, 1874 .xlo_check_space = ocfs2_xa_bucket_check_space, 1875 .xlo_can_reuse = ocfs2_xa_bucket_can_reuse, 1876 .xlo_get_free_start = ocfs2_xa_bucket_get_free_start, 1877 .xlo_wipe_namevalue = ocfs2_xa_bucket_wipe_namevalue, 1878 .xlo_add_entry = ocfs2_xa_bucket_add_entry, 1879 .xlo_add_namevalue = ocfs2_xa_bucket_add_namevalue, 1880 .xlo_fill_value_buf = ocfs2_xa_bucket_fill_value_buf, 1881 }; 1882 1883 static unsigned int ocfs2_xa_value_clusters(struct ocfs2_xa_loc *loc) 1884 { 1885 struct ocfs2_xattr_value_buf vb; 1886 1887 if (ocfs2_xattr_is_local(loc->xl_entry)) 1888 return 0; 1889 1890 ocfs2_xa_fill_value_buf(loc, &vb); 1891 return le32_to_cpu(vb.vb_xv->xr_clusters); 1892 } 1893 1894 static int ocfs2_xa_value_truncate(struct ocfs2_xa_loc *loc, u64 bytes, 1895 struct ocfs2_xattr_set_ctxt *ctxt) 1896 { 1897 int trunc_rc, access_rc; 1898 struct ocfs2_xattr_value_buf vb; 1899 1900 ocfs2_xa_fill_value_buf(loc, &vb); 1901 trunc_rc = ocfs2_xattr_value_truncate(loc->xl_inode, &vb, bytes, 1902 ctxt); 1903 1904 /* 1905 * The caller of ocfs2_xa_value_truncate() has already called 1906 * ocfs2_xa_journal_access on the loc. However, The truncate code 1907 * calls ocfs2_extend_trans(). This may commit the previous 1908 * transaction and open a new one. If this is a bucket, truncate 1909 * could leave only vb->vb_bh set up for journaling. Meanwhile, 1910 * the caller is expecting to dirty the entire bucket. So we must 1911 * reset the journal work. We do this even if truncate has failed, 1912 * as it could have failed after committing the extend. 1913 */ 1914 access_rc = ocfs2_xa_journal_access(ctxt->handle, loc, 1915 OCFS2_JOURNAL_ACCESS_WRITE); 1916 1917 /* Errors in truncate take precedence */ 1918 return trunc_rc ? trunc_rc : access_rc; 1919 } 1920 1921 static void ocfs2_xa_remove_entry(struct ocfs2_xa_loc *loc) 1922 { 1923 int index, count; 1924 struct ocfs2_xattr_header *xh = loc->xl_header; 1925 struct ocfs2_xattr_entry *entry = loc->xl_entry; 1926 1927 ocfs2_xa_wipe_namevalue(loc); 1928 loc->xl_entry = NULL; 1929 1930 le16_add_cpu(&xh->xh_count, -1); 1931 count = le16_to_cpu(xh->xh_count); 1932 1933 /* 1934 * Only zero out the entry if there are more remaining. This is 1935 * important for an empty bucket, as it keeps track of the 1936 * bucket's hash value. It doesn't hurt empty block storage. 1937 */ 1938 if (count) { 1939 index = ((char *)entry - (char *)&xh->xh_entries) / 1940 sizeof(struct ocfs2_xattr_entry); 1941 memmove(&xh->xh_entries[index], &xh->xh_entries[index + 1], 1942 (count - index) * sizeof(struct ocfs2_xattr_entry)); 1943 memset(&xh->xh_entries[count], 0, 1944 sizeof(struct ocfs2_xattr_entry)); 1945 } 1946 } 1947 1948 /* 1949 * If we have a problem adjusting the size of an external value during 1950 * ocfs2_xa_prepare_entry() or ocfs2_xa_remove(), we may have an xattr 1951 * in an intermediate state. For example, the value may be partially 1952 * truncated. 1953 * 1954 * If the value tree hasn't changed, the extend/truncate went nowhere. 1955 * We have nothing to do. The caller can treat it as a straight error. 1956 * 1957 * If the value tree got partially truncated, we now have a corrupted 1958 * extended attribute. We're going to wipe its entry and leak the 1959 * clusters. Better to leak some storage than leave a corrupt entry. 1960 * 1961 * If the value tree grew, it obviously didn't grow enough for the 1962 * new entry. We're not going to try and reclaim those clusters either. 1963 * If there was already an external value there (orig_clusters != 0), 1964 * the new clusters are attached safely and we can just leave the old 1965 * value in place. If there was no external value there, we remove 1966 * the entry. 1967 * 1968 * This way, the xattr block we store in the journal will be consistent. 1969 * If the size change broke because of the journal, no changes will hit 1970 * disk anyway. 1971 */ 1972 static void ocfs2_xa_cleanup_value_truncate(struct ocfs2_xa_loc *loc, 1973 const char *what, 1974 unsigned int orig_clusters) 1975 { 1976 unsigned int new_clusters = ocfs2_xa_value_clusters(loc); 1977 char *nameval_buf = ocfs2_xa_offset_pointer(loc, 1978 le16_to_cpu(loc->xl_entry->xe_name_offset)); 1979 1980 if (new_clusters < orig_clusters) { 1981 mlog(ML_ERROR, 1982 "Partial truncate while %s xattr %.*s. Leaking " 1983 "%u clusters and removing the entry\n", 1984 what, loc->xl_entry->xe_name_len, nameval_buf, 1985 orig_clusters - new_clusters); 1986 ocfs2_xa_remove_entry(loc); 1987 } else if (!orig_clusters) { 1988 mlog(ML_ERROR, 1989 "Unable to allocate an external value for xattr " 1990 "%.*s safely. Leaking %u clusters and removing the " 1991 "entry\n", 1992 loc->xl_entry->xe_name_len, nameval_buf, 1993 new_clusters - orig_clusters); 1994 ocfs2_xa_remove_entry(loc); 1995 } else if (new_clusters > orig_clusters) 1996 mlog(ML_ERROR, 1997 "Unable to grow xattr %.*s safely. %u new clusters " 1998 "have been added, but the value will not be " 1999 "modified\n", 2000 loc->xl_entry->xe_name_len, nameval_buf, 2001 new_clusters - orig_clusters); 2002 } 2003 2004 static int ocfs2_xa_remove(struct ocfs2_xa_loc *loc, 2005 struct ocfs2_xattr_set_ctxt *ctxt) 2006 { 2007 int rc = 0; 2008 unsigned int orig_clusters; 2009 2010 if (!ocfs2_xattr_is_local(loc->xl_entry)) { 2011 orig_clusters = ocfs2_xa_value_clusters(loc); 2012 rc = ocfs2_xa_value_truncate(loc, 0, ctxt); 2013 if (rc) { 2014 mlog_errno(rc); 2015 /* 2016 * Since this is remove, we can return 0 if 2017 * ocfs2_xa_cleanup_value_truncate() is going to 2018 * wipe the entry anyway. So we check the 2019 * cluster count as well. 2020 */ 2021 if (orig_clusters != ocfs2_xa_value_clusters(loc)) 2022 rc = 0; 2023 ocfs2_xa_cleanup_value_truncate(loc, "removing", 2024 orig_clusters); 2025 if (rc) 2026 goto out; 2027 } 2028 } 2029 2030 ocfs2_xa_remove_entry(loc); 2031 2032 out: 2033 return rc; 2034 } 2035 2036 static void ocfs2_xa_install_value_root(struct ocfs2_xa_loc *loc) 2037 { 2038 int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len); 2039 char *nameval_buf; 2040 2041 nameval_buf = ocfs2_xa_offset_pointer(loc, 2042 le16_to_cpu(loc->xl_entry->xe_name_offset)); 2043 memcpy(nameval_buf + name_size, &def_xv, OCFS2_XATTR_ROOT_SIZE); 2044 } 2045 2046 /* 2047 * Take an existing entry and make it ready for the new value. This 2048 * won't allocate space, but it may free space. It should be ready for 2049 * ocfs2_xa_prepare_entry() to finish the work. 2050 */ 2051 static int ocfs2_xa_reuse_entry(struct ocfs2_xa_loc *loc, 2052 struct ocfs2_xattr_info *xi, 2053 struct ocfs2_xattr_set_ctxt *ctxt) 2054 { 2055 int rc = 0; 2056 int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len); 2057 unsigned int orig_clusters; 2058 char *nameval_buf; 2059 int xe_local = ocfs2_xattr_is_local(loc->xl_entry); 2060 int xi_local = xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE; 2061 2062 BUG_ON(OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len) != 2063 name_size); 2064 2065 nameval_buf = ocfs2_xa_offset_pointer(loc, 2066 le16_to_cpu(loc->xl_entry->xe_name_offset)); 2067 if (xe_local) { 2068 memset(nameval_buf + name_size, 0, 2069 namevalue_size_xe(loc->xl_entry) - name_size); 2070 if (!xi_local) 2071 ocfs2_xa_install_value_root(loc); 2072 } else { 2073 orig_clusters = ocfs2_xa_value_clusters(loc); 2074 if (xi_local) { 2075 rc = ocfs2_xa_value_truncate(loc, 0, ctxt); 2076 if (rc < 0) 2077 mlog_errno(rc); 2078 else 2079 memset(nameval_buf + name_size, 0, 2080 namevalue_size_xe(loc->xl_entry) - 2081 name_size); 2082 } else if (le64_to_cpu(loc->xl_entry->xe_value_size) > 2083 xi->xi_value_len) { 2084 rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, 2085 ctxt); 2086 if (rc < 0) 2087 mlog_errno(rc); 2088 } 2089 2090 if (rc) { 2091 ocfs2_xa_cleanup_value_truncate(loc, "reusing", 2092 orig_clusters); 2093 goto out; 2094 } 2095 } 2096 2097 loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len); 2098 ocfs2_xattr_set_local(loc->xl_entry, xi_local); 2099 2100 out: 2101 return rc; 2102 } 2103 2104 /* 2105 * Prepares loc->xl_entry to receive the new xattr. This includes 2106 * properly setting up the name+value pair region. If loc->xl_entry 2107 * already exists, it will take care of modifying it appropriately. 2108 * 2109 * Note that this modifies the data. You did journal_access already, 2110 * right? 2111 */ 2112 static int ocfs2_xa_prepare_entry(struct ocfs2_xa_loc *loc, 2113 struct ocfs2_xattr_info *xi, 2114 u32 name_hash, 2115 struct ocfs2_xattr_set_ctxt *ctxt) 2116 { 2117 int rc = 0; 2118 unsigned int orig_clusters; 2119 __le64 orig_value_size = 0; 2120 2121 rc = ocfs2_xa_check_space(loc, xi); 2122 if (rc) 2123 goto out; 2124 2125 if (loc->xl_entry) { 2126 if (ocfs2_xa_can_reuse_entry(loc, xi)) { 2127 orig_value_size = loc->xl_entry->xe_value_size; 2128 rc = ocfs2_xa_reuse_entry(loc, xi, ctxt); 2129 if (rc) 2130 goto out; 2131 goto alloc_value; 2132 } 2133 2134 if (!ocfs2_xattr_is_local(loc->xl_entry)) { 2135 orig_clusters = ocfs2_xa_value_clusters(loc); 2136 rc = ocfs2_xa_value_truncate(loc, 0, ctxt); 2137 if (rc) { 2138 mlog_errno(rc); 2139 ocfs2_xa_cleanup_value_truncate(loc, 2140 "overwriting", 2141 orig_clusters); 2142 goto out; 2143 } 2144 } 2145 ocfs2_xa_wipe_namevalue(loc); 2146 } else 2147 ocfs2_xa_add_entry(loc, name_hash); 2148 2149 /* 2150 * If we get here, we have a blank entry. Fill it. We grow our 2151 * name+value pair back from the end. 2152 */ 2153 ocfs2_xa_add_namevalue(loc, xi); 2154 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) 2155 ocfs2_xa_install_value_root(loc); 2156 2157 alloc_value: 2158 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 2159 orig_clusters = ocfs2_xa_value_clusters(loc); 2160 rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, ctxt); 2161 if (rc < 0) { 2162 ctxt->set_abort = 1; 2163 ocfs2_xa_cleanup_value_truncate(loc, "growing", 2164 orig_clusters); 2165 /* 2166 * If we were growing an existing value, 2167 * ocfs2_xa_cleanup_value_truncate() won't remove 2168 * the entry. We need to restore the original value 2169 * size. 2170 */ 2171 if (loc->xl_entry) { 2172 BUG_ON(!orig_value_size); 2173 loc->xl_entry->xe_value_size = orig_value_size; 2174 } 2175 mlog_errno(rc); 2176 } 2177 } 2178 2179 out: 2180 return rc; 2181 } 2182 2183 /* 2184 * Store the value portion of the name+value pair. This will skip 2185 * values that are stored externally. Their tree roots were set up 2186 * by ocfs2_xa_prepare_entry(). 2187 */ 2188 static int ocfs2_xa_store_value(struct ocfs2_xa_loc *loc, 2189 struct ocfs2_xattr_info *xi, 2190 struct ocfs2_xattr_set_ctxt *ctxt) 2191 { 2192 int rc = 0; 2193 int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 2194 int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len); 2195 char *nameval_buf; 2196 struct ocfs2_xattr_value_buf vb; 2197 2198 nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset); 2199 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 2200 ocfs2_xa_fill_value_buf(loc, &vb); 2201 rc = __ocfs2_xattr_set_value_outside(loc->xl_inode, 2202 ctxt->handle, &vb, 2203 xi->xi_value, 2204 xi->xi_value_len); 2205 } else 2206 memcpy(nameval_buf + name_size, xi->xi_value, xi->xi_value_len); 2207 2208 return rc; 2209 } 2210 2211 static int ocfs2_xa_set(struct ocfs2_xa_loc *loc, 2212 struct ocfs2_xattr_info *xi, 2213 struct ocfs2_xattr_set_ctxt *ctxt) 2214 { 2215 int ret; 2216 u32 name_hash = ocfs2_xattr_name_hash(loc->xl_inode, xi->xi_name, 2217 xi->xi_name_len); 2218 2219 ret = ocfs2_xa_journal_access(ctxt->handle, loc, 2220 OCFS2_JOURNAL_ACCESS_WRITE); 2221 if (ret) { 2222 mlog_errno(ret); 2223 goto out; 2224 } 2225 2226 /* 2227 * From here on out, everything is going to modify the buffer a 2228 * little. Errors are going to leave the xattr header in a 2229 * sane state. Thus, even with errors we dirty the sucker. 2230 */ 2231 2232 /* Don't worry, we are never called with !xi_value and !xl_entry */ 2233 if (!xi->xi_value) { 2234 ret = ocfs2_xa_remove(loc, ctxt); 2235 goto out_dirty; 2236 } 2237 2238 ret = ocfs2_xa_prepare_entry(loc, xi, name_hash, ctxt); 2239 if (ret) { 2240 if (ret != -ENOSPC) 2241 mlog_errno(ret); 2242 goto out_dirty; 2243 } 2244 2245 ret = ocfs2_xa_store_value(loc, xi, ctxt); 2246 if (ret) 2247 mlog_errno(ret); 2248 2249 out_dirty: 2250 ocfs2_xa_journal_dirty(ctxt->handle, loc); 2251 2252 out: 2253 return ret; 2254 } 2255 2256 static void ocfs2_init_dinode_xa_loc(struct ocfs2_xa_loc *loc, 2257 struct inode *inode, 2258 struct buffer_head *bh, 2259 struct ocfs2_xattr_entry *entry) 2260 { 2261 struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; 2262 2263 BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_XATTR_FL)); 2264 2265 loc->xl_inode = inode; 2266 loc->xl_ops = &ocfs2_xa_block_loc_ops; 2267 loc->xl_storage = bh; 2268 loc->xl_entry = entry; 2269 loc->xl_size = le16_to_cpu(di->i_xattr_inline_size); 2270 loc->xl_header = 2271 (struct ocfs2_xattr_header *)(bh->b_data + bh->b_size - 2272 loc->xl_size); 2273 } 2274 2275 static void ocfs2_init_xattr_block_xa_loc(struct ocfs2_xa_loc *loc, 2276 struct inode *inode, 2277 struct buffer_head *bh, 2278 struct ocfs2_xattr_entry *entry) 2279 { 2280 struct ocfs2_xattr_block *xb = 2281 (struct ocfs2_xattr_block *)bh->b_data; 2282 2283 BUG_ON(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED); 2284 2285 loc->xl_inode = inode; 2286 loc->xl_ops = &ocfs2_xa_block_loc_ops; 2287 loc->xl_storage = bh; 2288 loc->xl_header = &(xb->xb_attrs.xb_header); 2289 loc->xl_entry = entry; 2290 loc->xl_size = bh->b_size - offsetof(struct ocfs2_xattr_block, 2291 xb_attrs.xb_header); 2292 } 2293 2294 static void ocfs2_init_xattr_bucket_xa_loc(struct ocfs2_xa_loc *loc, 2295 struct ocfs2_xattr_bucket *bucket, 2296 struct ocfs2_xattr_entry *entry) 2297 { 2298 loc->xl_inode = bucket->bu_inode; 2299 loc->xl_ops = &ocfs2_xa_bucket_loc_ops; 2300 loc->xl_storage = bucket; 2301 loc->xl_header = bucket_xh(bucket); 2302 loc->xl_entry = entry; 2303 loc->xl_size = OCFS2_XATTR_BUCKET_SIZE; 2304 } 2305 2306 /* 2307 * In xattr remove, if it is stored outside and refcounted, we may have 2308 * the chance to split the refcount tree. So need the allocators. 2309 */ 2310 static int ocfs2_lock_xattr_remove_allocators(struct inode *inode, 2311 struct ocfs2_xattr_value_root *xv, 2312 struct ocfs2_caching_info *ref_ci, 2313 struct buffer_head *ref_root_bh, 2314 struct ocfs2_alloc_context **meta_ac, 2315 int *ref_credits) 2316 { 2317 int ret, meta_add = 0; 2318 u32 p_cluster, num_clusters; 2319 unsigned int ext_flags; 2320 2321 *ref_credits = 0; 2322 ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster, 2323 &num_clusters, 2324 &xv->xr_list, 2325 &ext_flags); 2326 if (ret) { 2327 mlog_errno(ret); 2328 goto out; 2329 } 2330 2331 if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) 2332 goto out; 2333 2334 ret = ocfs2_refcounted_xattr_delete_need(inode, ref_ci, 2335 ref_root_bh, xv, 2336 &meta_add, ref_credits); 2337 if (ret) { 2338 mlog_errno(ret); 2339 goto out; 2340 } 2341 2342 ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb), 2343 meta_add, meta_ac); 2344 if (ret) 2345 mlog_errno(ret); 2346 2347 out: 2348 return ret; 2349 } 2350 2351 static int ocfs2_remove_value_outside(struct inode*inode, 2352 struct ocfs2_xattr_value_buf *vb, 2353 struct ocfs2_xattr_header *header, 2354 struct ocfs2_caching_info *ref_ci, 2355 struct buffer_head *ref_root_bh) 2356 { 2357 int ret = 0, i, ref_credits; 2358 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2359 struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, }; 2360 void *val; 2361 2362 ocfs2_init_dealloc_ctxt(&ctxt.dealloc); 2363 2364 for (i = 0; i < le16_to_cpu(header->xh_count); i++) { 2365 struct ocfs2_xattr_entry *entry = &header->xh_entries[i]; 2366 2367 if (ocfs2_xattr_is_local(entry)) 2368 continue; 2369 2370 val = (void *)header + 2371 le16_to_cpu(entry->xe_name_offset); 2372 vb->vb_xv = (struct ocfs2_xattr_value_root *) 2373 (val + OCFS2_XATTR_SIZE(entry->xe_name_len)); 2374 2375 ret = ocfs2_lock_xattr_remove_allocators(inode, vb->vb_xv, 2376 ref_ci, ref_root_bh, 2377 &ctxt.meta_ac, 2378 &ref_credits); 2379 2380 ctxt.handle = ocfs2_start_trans(osb, ref_credits + 2381 ocfs2_remove_extent_credits(osb->sb)); 2382 if (IS_ERR(ctxt.handle)) { 2383 ret = PTR_ERR(ctxt.handle); 2384 mlog_errno(ret); 2385 break; 2386 } 2387 2388 ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt); 2389 2390 ocfs2_commit_trans(osb, ctxt.handle); 2391 if (ctxt.meta_ac) { 2392 ocfs2_free_alloc_context(ctxt.meta_ac); 2393 ctxt.meta_ac = NULL; 2394 } 2395 2396 if (ret < 0) { 2397 mlog_errno(ret); 2398 break; 2399 } 2400 2401 } 2402 2403 if (ctxt.meta_ac) 2404 ocfs2_free_alloc_context(ctxt.meta_ac); 2405 ocfs2_schedule_truncate_log_flush(osb, 1); 2406 ocfs2_run_deallocs(osb, &ctxt.dealloc); 2407 return ret; 2408 } 2409 2410 static int ocfs2_xattr_ibody_remove(struct inode *inode, 2411 struct buffer_head *di_bh, 2412 struct ocfs2_caching_info *ref_ci, 2413 struct buffer_head *ref_root_bh) 2414 { 2415 2416 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 2417 struct ocfs2_xattr_header *header; 2418 int ret; 2419 struct ocfs2_xattr_value_buf vb = { 2420 .vb_bh = di_bh, 2421 .vb_access = ocfs2_journal_access_di, 2422 }; 2423 2424 header = (struct ocfs2_xattr_header *) 2425 ((void *)di + inode->i_sb->s_blocksize - 2426 le16_to_cpu(di->i_xattr_inline_size)); 2427 2428 ret = ocfs2_remove_value_outside(inode, &vb, header, 2429 ref_ci, ref_root_bh); 2430 2431 return ret; 2432 } 2433 2434 struct ocfs2_rm_xattr_bucket_para { 2435 struct ocfs2_caching_info *ref_ci; 2436 struct buffer_head *ref_root_bh; 2437 }; 2438 2439 static int ocfs2_xattr_block_remove(struct inode *inode, 2440 struct buffer_head *blk_bh, 2441 struct ocfs2_caching_info *ref_ci, 2442 struct buffer_head *ref_root_bh) 2443 { 2444 struct ocfs2_xattr_block *xb; 2445 int ret = 0; 2446 struct ocfs2_xattr_value_buf vb = { 2447 .vb_bh = blk_bh, 2448 .vb_access = ocfs2_journal_access_xb, 2449 }; 2450 struct ocfs2_rm_xattr_bucket_para args = { 2451 .ref_ci = ref_ci, 2452 .ref_root_bh = ref_root_bh, 2453 }; 2454 2455 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 2456 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 2457 struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header); 2458 ret = ocfs2_remove_value_outside(inode, &vb, header, 2459 ref_ci, ref_root_bh); 2460 } else 2461 ret = ocfs2_iterate_xattr_index_block(inode, 2462 blk_bh, 2463 ocfs2_rm_xattr_cluster, 2464 &args); 2465 2466 return ret; 2467 } 2468 2469 static int ocfs2_xattr_free_block(struct inode *inode, 2470 u64 block, 2471 struct ocfs2_caching_info *ref_ci, 2472 struct buffer_head *ref_root_bh) 2473 { 2474 struct inode *xb_alloc_inode; 2475 struct buffer_head *xb_alloc_bh = NULL; 2476 struct buffer_head *blk_bh = NULL; 2477 struct ocfs2_xattr_block *xb; 2478 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2479 handle_t *handle; 2480 int ret = 0; 2481 u64 blk, bg_blkno; 2482 u16 bit; 2483 2484 ret = ocfs2_read_xattr_block(inode, block, &blk_bh); 2485 if (ret < 0) { 2486 mlog_errno(ret); 2487 goto out; 2488 } 2489 2490 ret = ocfs2_xattr_block_remove(inode, blk_bh, ref_ci, ref_root_bh); 2491 if (ret < 0) { 2492 mlog_errno(ret); 2493 goto out; 2494 } 2495 2496 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 2497 blk = le64_to_cpu(xb->xb_blkno); 2498 bit = le16_to_cpu(xb->xb_suballoc_bit); 2499 if (xb->xb_suballoc_loc) 2500 bg_blkno = le64_to_cpu(xb->xb_suballoc_loc); 2501 else 2502 bg_blkno = ocfs2_which_suballoc_group(blk, bit); 2503 2504 xb_alloc_inode = ocfs2_get_system_file_inode(osb, 2505 EXTENT_ALLOC_SYSTEM_INODE, 2506 le16_to_cpu(xb->xb_suballoc_slot)); 2507 if (!xb_alloc_inode) { 2508 ret = -ENOMEM; 2509 mlog_errno(ret); 2510 goto out; 2511 } 2512 mutex_lock(&xb_alloc_inode->i_mutex); 2513 2514 ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1); 2515 if (ret < 0) { 2516 mlog_errno(ret); 2517 goto out_mutex; 2518 } 2519 2520 handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE); 2521 if (IS_ERR(handle)) { 2522 ret = PTR_ERR(handle); 2523 mlog_errno(ret); 2524 goto out_unlock; 2525 } 2526 2527 ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh, 2528 bit, bg_blkno, 1); 2529 if (ret < 0) 2530 mlog_errno(ret); 2531 2532 ocfs2_commit_trans(osb, handle); 2533 out_unlock: 2534 ocfs2_inode_unlock(xb_alloc_inode, 1); 2535 brelse(xb_alloc_bh); 2536 out_mutex: 2537 mutex_unlock(&xb_alloc_inode->i_mutex); 2538 iput(xb_alloc_inode); 2539 out: 2540 brelse(blk_bh); 2541 return ret; 2542 } 2543 2544 /* 2545 * ocfs2_xattr_remove() 2546 * 2547 * Free extended attribute resources associated with this inode. 2548 */ 2549 int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh) 2550 { 2551 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2552 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 2553 struct ocfs2_refcount_tree *ref_tree = NULL; 2554 struct buffer_head *ref_root_bh = NULL; 2555 struct ocfs2_caching_info *ref_ci = NULL; 2556 handle_t *handle; 2557 int ret; 2558 2559 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) 2560 return 0; 2561 2562 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) 2563 return 0; 2564 2565 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL) { 2566 ret = ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb), 2567 le64_to_cpu(di->i_refcount_loc), 2568 1, &ref_tree, &ref_root_bh); 2569 if (ret) { 2570 mlog_errno(ret); 2571 goto out; 2572 } 2573 ref_ci = &ref_tree->rf_ci; 2574 2575 } 2576 2577 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 2578 ret = ocfs2_xattr_ibody_remove(inode, di_bh, 2579 ref_ci, ref_root_bh); 2580 if (ret < 0) { 2581 mlog_errno(ret); 2582 goto out; 2583 } 2584 } 2585 2586 if (di->i_xattr_loc) { 2587 ret = ocfs2_xattr_free_block(inode, 2588 le64_to_cpu(di->i_xattr_loc), 2589 ref_ci, ref_root_bh); 2590 if (ret < 0) { 2591 mlog_errno(ret); 2592 goto out; 2593 } 2594 } 2595 2596 handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), 2597 OCFS2_INODE_UPDATE_CREDITS); 2598 if (IS_ERR(handle)) { 2599 ret = PTR_ERR(handle); 2600 mlog_errno(ret); 2601 goto out; 2602 } 2603 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, 2604 OCFS2_JOURNAL_ACCESS_WRITE); 2605 if (ret) { 2606 mlog_errno(ret); 2607 goto out_commit; 2608 } 2609 2610 di->i_xattr_loc = 0; 2611 2612 spin_lock(&oi->ip_lock); 2613 oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL); 2614 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 2615 spin_unlock(&oi->ip_lock); 2616 ocfs2_update_inode_fsync_trans(handle, inode, 0); 2617 2618 ocfs2_journal_dirty(handle, di_bh); 2619 out_commit: 2620 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); 2621 out: 2622 if (ref_tree) 2623 ocfs2_unlock_refcount_tree(OCFS2_SB(inode->i_sb), ref_tree, 1); 2624 brelse(ref_root_bh); 2625 return ret; 2626 } 2627 2628 static int ocfs2_xattr_has_space_inline(struct inode *inode, 2629 struct ocfs2_dinode *di) 2630 { 2631 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2632 unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size; 2633 int free; 2634 2635 if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE) 2636 return 0; 2637 2638 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 2639 struct ocfs2_inline_data *idata = &di->id2.i_data; 2640 free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size); 2641 } else if (ocfs2_inode_is_fast_symlink(inode)) { 2642 free = ocfs2_fast_symlink_chars(inode->i_sb) - 2643 le64_to_cpu(di->i_size); 2644 } else { 2645 struct ocfs2_extent_list *el = &di->id2.i_list; 2646 free = (le16_to_cpu(el->l_count) - 2647 le16_to_cpu(el->l_next_free_rec)) * 2648 sizeof(struct ocfs2_extent_rec); 2649 } 2650 if (free >= xattrsize) 2651 return 1; 2652 2653 return 0; 2654 } 2655 2656 /* 2657 * ocfs2_xattr_ibody_find() 2658 * 2659 * Find extended attribute in inode block and 2660 * fill search info into struct ocfs2_xattr_search. 2661 */ 2662 static int ocfs2_xattr_ibody_find(struct inode *inode, 2663 int name_index, 2664 const char *name, 2665 struct ocfs2_xattr_search *xs) 2666 { 2667 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2668 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 2669 int ret; 2670 int has_space = 0; 2671 2672 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) 2673 return 0; 2674 2675 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { 2676 down_read(&oi->ip_alloc_sem); 2677 has_space = ocfs2_xattr_has_space_inline(inode, di); 2678 up_read(&oi->ip_alloc_sem); 2679 if (!has_space) 2680 return 0; 2681 } 2682 2683 xs->xattr_bh = xs->inode_bh; 2684 xs->end = (void *)di + inode->i_sb->s_blocksize; 2685 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) 2686 xs->header = (struct ocfs2_xattr_header *) 2687 (xs->end - le16_to_cpu(di->i_xattr_inline_size)); 2688 else 2689 xs->header = (struct ocfs2_xattr_header *) 2690 (xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size); 2691 xs->base = (void *)xs->header; 2692 xs->here = xs->header->xh_entries; 2693 2694 /* Find the named attribute. */ 2695 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 2696 ret = ocfs2_xattr_find_entry(name_index, name, xs); 2697 if (ret && ret != -ENODATA) 2698 return ret; 2699 xs->not_found = ret; 2700 } 2701 2702 return 0; 2703 } 2704 2705 static int ocfs2_xattr_ibody_init(struct inode *inode, 2706 struct buffer_head *di_bh, 2707 struct ocfs2_xattr_set_ctxt *ctxt) 2708 { 2709 int ret; 2710 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2711 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 2712 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2713 unsigned int xattrsize = osb->s_xattr_inline_size; 2714 2715 if (!ocfs2_xattr_has_space_inline(inode, di)) { 2716 ret = -ENOSPC; 2717 goto out; 2718 } 2719 2720 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), di_bh, 2721 OCFS2_JOURNAL_ACCESS_WRITE); 2722 if (ret) { 2723 mlog_errno(ret); 2724 goto out; 2725 } 2726 2727 /* 2728 * Adjust extent record count or inline data size 2729 * to reserve space for extended attribute. 2730 */ 2731 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 2732 struct ocfs2_inline_data *idata = &di->id2.i_data; 2733 le16_add_cpu(&idata->id_count, -xattrsize); 2734 } else if (!(ocfs2_inode_is_fast_symlink(inode))) { 2735 struct ocfs2_extent_list *el = &di->id2.i_list; 2736 le16_add_cpu(&el->l_count, -(xattrsize / 2737 sizeof(struct ocfs2_extent_rec))); 2738 } 2739 di->i_xattr_inline_size = cpu_to_le16(xattrsize); 2740 2741 spin_lock(&oi->ip_lock); 2742 oi->ip_dyn_features |= OCFS2_INLINE_XATTR_FL|OCFS2_HAS_XATTR_FL; 2743 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 2744 spin_unlock(&oi->ip_lock); 2745 2746 ocfs2_journal_dirty(ctxt->handle, di_bh); 2747 2748 out: 2749 return ret; 2750 } 2751 2752 /* 2753 * ocfs2_xattr_ibody_set() 2754 * 2755 * Set, replace or remove an extended attribute into inode block. 2756 * 2757 */ 2758 static int ocfs2_xattr_ibody_set(struct inode *inode, 2759 struct ocfs2_xattr_info *xi, 2760 struct ocfs2_xattr_search *xs, 2761 struct ocfs2_xattr_set_ctxt *ctxt) 2762 { 2763 int ret; 2764 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2765 struct ocfs2_xa_loc loc; 2766 2767 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) 2768 return -ENOSPC; 2769 2770 down_write(&oi->ip_alloc_sem); 2771 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { 2772 ret = ocfs2_xattr_ibody_init(inode, xs->inode_bh, ctxt); 2773 if (ret) { 2774 if (ret != -ENOSPC) 2775 mlog_errno(ret); 2776 goto out; 2777 } 2778 } 2779 2780 ocfs2_init_dinode_xa_loc(&loc, inode, xs->inode_bh, 2781 xs->not_found ? NULL : xs->here); 2782 ret = ocfs2_xa_set(&loc, xi, ctxt); 2783 if (ret) { 2784 if (ret != -ENOSPC) 2785 mlog_errno(ret); 2786 goto out; 2787 } 2788 xs->here = loc.xl_entry; 2789 2790 out: 2791 up_write(&oi->ip_alloc_sem); 2792 2793 return ret; 2794 } 2795 2796 /* 2797 * ocfs2_xattr_block_find() 2798 * 2799 * Find extended attribute in external block and 2800 * fill search info into struct ocfs2_xattr_search. 2801 */ 2802 static int ocfs2_xattr_block_find(struct inode *inode, 2803 int name_index, 2804 const char *name, 2805 struct ocfs2_xattr_search *xs) 2806 { 2807 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 2808 struct buffer_head *blk_bh = NULL; 2809 struct ocfs2_xattr_block *xb; 2810 int ret = 0; 2811 2812 if (!di->i_xattr_loc) 2813 return ret; 2814 2815 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc), 2816 &blk_bh); 2817 if (ret < 0) { 2818 mlog_errno(ret); 2819 return ret; 2820 } 2821 2822 xs->xattr_bh = blk_bh; 2823 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 2824 2825 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 2826 xs->header = &xb->xb_attrs.xb_header; 2827 xs->base = (void *)xs->header; 2828 xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size; 2829 xs->here = xs->header->xh_entries; 2830 2831 ret = ocfs2_xattr_find_entry(name_index, name, xs); 2832 } else 2833 ret = ocfs2_xattr_index_block_find(inode, blk_bh, 2834 name_index, 2835 name, xs); 2836 2837 if (ret && ret != -ENODATA) { 2838 xs->xattr_bh = NULL; 2839 goto cleanup; 2840 } 2841 xs->not_found = ret; 2842 return 0; 2843 cleanup: 2844 brelse(blk_bh); 2845 2846 return ret; 2847 } 2848 2849 static int ocfs2_create_xattr_block(struct inode *inode, 2850 struct buffer_head *inode_bh, 2851 struct ocfs2_xattr_set_ctxt *ctxt, 2852 int indexed, 2853 struct buffer_head **ret_bh) 2854 { 2855 int ret; 2856 u16 suballoc_bit_start; 2857 u32 num_got; 2858 u64 suballoc_loc, first_blkno; 2859 struct ocfs2_dinode *di = (struct ocfs2_dinode *)inode_bh->b_data; 2860 struct buffer_head *new_bh = NULL; 2861 struct ocfs2_xattr_block *xblk; 2862 2863 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), 2864 inode_bh, OCFS2_JOURNAL_ACCESS_CREATE); 2865 if (ret < 0) { 2866 mlog_errno(ret); 2867 goto end; 2868 } 2869 2870 ret = ocfs2_claim_metadata(ctxt->handle, ctxt->meta_ac, 1, 2871 &suballoc_loc, &suballoc_bit_start, 2872 &num_got, &first_blkno); 2873 if (ret < 0) { 2874 mlog_errno(ret); 2875 goto end; 2876 } 2877 2878 new_bh = sb_getblk(inode->i_sb, first_blkno); 2879 if (!new_bh) { 2880 ret = -ENOMEM; 2881 mlog_errno(ret); 2882 goto end; 2883 } 2884 2885 ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh); 2886 2887 ret = ocfs2_journal_access_xb(ctxt->handle, INODE_CACHE(inode), 2888 new_bh, 2889 OCFS2_JOURNAL_ACCESS_CREATE); 2890 if (ret < 0) { 2891 mlog_errno(ret); 2892 goto end; 2893 } 2894 2895 /* Initialize ocfs2_xattr_block */ 2896 xblk = (struct ocfs2_xattr_block *)new_bh->b_data; 2897 memset(xblk, 0, inode->i_sb->s_blocksize); 2898 strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE); 2899 xblk->xb_suballoc_slot = cpu_to_le16(ctxt->meta_ac->ac_alloc_slot); 2900 xblk->xb_suballoc_loc = cpu_to_le64(suballoc_loc); 2901 xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start); 2902 xblk->xb_fs_generation = 2903 cpu_to_le32(OCFS2_SB(inode->i_sb)->fs_generation); 2904 xblk->xb_blkno = cpu_to_le64(first_blkno); 2905 if (indexed) { 2906 struct ocfs2_xattr_tree_root *xr = &xblk->xb_attrs.xb_root; 2907 xr->xt_clusters = cpu_to_le32(1); 2908 xr->xt_last_eb_blk = 0; 2909 xr->xt_list.l_tree_depth = 0; 2910 xr->xt_list.l_count = cpu_to_le16( 2911 ocfs2_xattr_recs_per_xb(inode->i_sb)); 2912 xr->xt_list.l_next_free_rec = cpu_to_le16(1); 2913 xblk->xb_flags = cpu_to_le16(OCFS2_XATTR_INDEXED); 2914 } 2915 ocfs2_journal_dirty(ctxt->handle, new_bh); 2916 2917 /* Add it to the inode */ 2918 di->i_xattr_loc = cpu_to_le64(first_blkno); 2919 2920 spin_lock(&OCFS2_I(inode)->ip_lock); 2921 OCFS2_I(inode)->ip_dyn_features |= OCFS2_HAS_XATTR_FL; 2922 di->i_dyn_features = cpu_to_le16(OCFS2_I(inode)->ip_dyn_features); 2923 spin_unlock(&OCFS2_I(inode)->ip_lock); 2924 2925 ocfs2_journal_dirty(ctxt->handle, inode_bh); 2926 2927 *ret_bh = new_bh; 2928 new_bh = NULL; 2929 2930 end: 2931 brelse(new_bh); 2932 return ret; 2933 } 2934 2935 /* 2936 * ocfs2_xattr_block_set() 2937 * 2938 * Set, replace or remove an extended attribute into external block. 2939 * 2940 */ 2941 static int ocfs2_xattr_block_set(struct inode *inode, 2942 struct ocfs2_xattr_info *xi, 2943 struct ocfs2_xattr_search *xs, 2944 struct ocfs2_xattr_set_ctxt *ctxt) 2945 { 2946 struct buffer_head *new_bh = NULL; 2947 struct ocfs2_xattr_block *xblk = NULL; 2948 int ret; 2949 struct ocfs2_xa_loc loc; 2950 2951 if (!xs->xattr_bh) { 2952 ret = ocfs2_create_xattr_block(inode, xs->inode_bh, ctxt, 2953 0, &new_bh); 2954 if (ret) { 2955 mlog_errno(ret); 2956 goto end; 2957 } 2958 2959 xs->xattr_bh = new_bh; 2960 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; 2961 xs->header = &xblk->xb_attrs.xb_header; 2962 xs->base = (void *)xs->header; 2963 xs->end = (void *)xblk + inode->i_sb->s_blocksize; 2964 xs->here = xs->header->xh_entries; 2965 } else 2966 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; 2967 2968 if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) { 2969 ocfs2_init_xattr_block_xa_loc(&loc, inode, xs->xattr_bh, 2970 xs->not_found ? NULL : xs->here); 2971 2972 ret = ocfs2_xa_set(&loc, xi, ctxt); 2973 if (!ret) 2974 xs->here = loc.xl_entry; 2975 else if ((ret != -ENOSPC) || ctxt->set_abort) 2976 goto end; 2977 else { 2978 ret = ocfs2_xattr_create_index_block(inode, xs, ctxt); 2979 if (ret) 2980 goto end; 2981 } 2982 } 2983 2984 if (le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED) 2985 ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt); 2986 2987 end: 2988 return ret; 2989 } 2990 2991 /* Check whether the new xattr can be inserted into the inode. */ 2992 static int ocfs2_xattr_can_be_in_inode(struct inode *inode, 2993 struct ocfs2_xattr_info *xi, 2994 struct ocfs2_xattr_search *xs) 2995 { 2996 struct ocfs2_xattr_entry *last; 2997 int free, i; 2998 size_t min_offs = xs->end - xs->base; 2999 3000 if (!xs->header) 3001 return 0; 3002 3003 last = xs->header->xh_entries; 3004 3005 for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) { 3006 size_t offs = le16_to_cpu(last->xe_name_offset); 3007 if (offs < min_offs) 3008 min_offs = offs; 3009 last += 1; 3010 } 3011 3012 free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP; 3013 if (free < 0) 3014 return 0; 3015 3016 BUG_ON(!xs->not_found); 3017 3018 if (free >= (sizeof(struct ocfs2_xattr_entry) + namevalue_size_xi(xi))) 3019 return 1; 3020 3021 return 0; 3022 } 3023 3024 static int ocfs2_calc_xattr_set_need(struct inode *inode, 3025 struct ocfs2_dinode *di, 3026 struct ocfs2_xattr_info *xi, 3027 struct ocfs2_xattr_search *xis, 3028 struct ocfs2_xattr_search *xbs, 3029 int *clusters_need, 3030 int *meta_need, 3031 int *credits_need) 3032 { 3033 int ret = 0, old_in_xb = 0; 3034 int clusters_add = 0, meta_add = 0, credits = 0; 3035 struct buffer_head *bh = NULL; 3036 struct ocfs2_xattr_block *xb = NULL; 3037 struct ocfs2_xattr_entry *xe = NULL; 3038 struct ocfs2_xattr_value_root *xv = NULL; 3039 char *base = NULL; 3040 int name_offset, name_len = 0; 3041 u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, 3042 xi->xi_value_len); 3043 u64 value_size; 3044 3045 /* 3046 * Calculate the clusters we need to write. 3047 * No matter whether we replace an old one or add a new one, 3048 * we need this for writing. 3049 */ 3050 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) 3051 credits += new_clusters * 3052 ocfs2_clusters_to_blocks(inode->i_sb, 1); 3053 3054 if (xis->not_found && xbs->not_found) { 3055 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3056 3057 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 3058 clusters_add += new_clusters; 3059 credits += ocfs2_calc_extend_credits(inode->i_sb, 3060 &def_xv.xv.xr_list); 3061 } 3062 3063 goto meta_guess; 3064 } 3065 3066 if (!xis->not_found) { 3067 xe = xis->here; 3068 name_offset = le16_to_cpu(xe->xe_name_offset); 3069 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 3070 base = xis->base; 3071 credits += OCFS2_INODE_UPDATE_CREDITS; 3072 } else { 3073 int i, block_off = 0; 3074 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; 3075 xe = xbs->here; 3076 name_offset = le16_to_cpu(xe->xe_name_offset); 3077 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 3078 i = xbs->here - xbs->header->xh_entries; 3079 old_in_xb = 1; 3080 3081 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 3082 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 3083 bucket_xh(xbs->bucket), 3084 i, &block_off, 3085 &name_offset); 3086 base = bucket_block(xbs->bucket, block_off); 3087 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3088 } else { 3089 base = xbs->base; 3090 credits += OCFS2_XATTR_BLOCK_UPDATE_CREDITS; 3091 } 3092 } 3093 3094 /* 3095 * delete a xattr doesn't need metadata and cluster allocation. 3096 * so just calculate the credits and return. 3097 * 3098 * The credits for removing the value tree will be extended 3099 * by ocfs2_remove_extent itself. 3100 */ 3101 if (!xi->xi_value) { 3102 if (!ocfs2_xattr_is_local(xe)) 3103 credits += ocfs2_remove_extent_credits(inode->i_sb); 3104 3105 goto out; 3106 } 3107 3108 /* do cluster allocation guess first. */ 3109 value_size = le64_to_cpu(xe->xe_value_size); 3110 3111 if (old_in_xb) { 3112 /* 3113 * In xattr set, we always try to set the xe in inode first, 3114 * so if it can be inserted into inode successfully, the old 3115 * one will be removed from the xattr block, and this xattr 3116 * will be inserted into inode as a new xattr in inode. 3117 */ 3118 if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) { 3119 clusters_add += new_clusters; 3120 credits += ocfs2_remove_extent_credits(inode->i_sb) + 3121 OCFS2_INODE_UPDATE_CREDITS; 3122 if (!ocfs2_xattr_is_local(xe)) 3123 credits += ocfs2_calc_extend_credits( 3124 inode->i_sb, 3125 &def_xv.xv.xr_list); 3126 goto out; 3127 } 3128 } 3129 3130 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 3131 /* the new values will be stored outside. */ 3132 u32 old_clusters = 0; 3133 3134 if (!ocfs2_xattr_is_local(xe)) { 3135 old_clusters = ocfs2_clusters_for_bytes(inode->i_sb, 3136 value_size); 3137 xv = (struct ocfs2_xattr_value_root *) 3138 (base + name_offset + name_len); 3139 value_size = OCFS2_XATTR_ROOT_SIZE; 3140 } else 3141 xv = &def_xv.xv; 3142 3143 if (old_clusters >= new_clusters) { 3144 credits += ocfs2_remove_extent_credits(inode->i_sb); 3145 goto out; 3146 } else { 3147 meta_add += ocfs2_extend_meta_needed(&xv->xr_list); 3148 clusters_add += new_clusters - old_clusters; 3149 credits += ocfs2_calc_extend_credits(inode->i_sb, 3150 &xv->xr_list); 3151 if (value_size >= OCFS2_XATTR_ROOT_SIZE) 3152 goto out; 3153 } 3154 } else { 3155 /* 3156 * Now the new value will be stored inside. So if the new 3157 * value is smaller than the size of value root or the old 3158 * value, we don't need any allocation, otherwise we have 3159 * to guess metadata allocation. 3160 */ 3161 if ((ocfs2_xattr_is_local(xe) && 3162 (value_size >= xi->xi_value_len)) || 3163 (!ocfs2_xattr_is_local(xe) && 3164 OCFS2_XATTR_ROOT_SIZE >= xi->xi_value_len)) 3165 goto out; 3166 } 3167 3168 meta_guess: 3169 /* calculate metadata allocation. */ 3170 if (di->i_xattr_loc) { 3171 if (!xbs->xattr_bh) { 3172 ret = ocfs2_read_xattr_block(inode, 3173 le64_to_cpu(di->i_xattr_loc), 3174 &bh); 3175 if (ret) { 3176 mlog_errno(ret); 3177 goto out; 3178 } 3179 3180 xb = (struct ocfs2_xattr_block *)bh->b_data; 3181 } else 3182 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; 3183 3184 /* 3185 * If there is already an xattr tree, good, we can calculate 3186 * like other b-trees. Otherwise we may have the chance of 3187 * create a tree, the credit calculation is borrowed from 3188 * ocfs2_calc_extend_credits with root_el = NULL. And the 3189 * new tree will be cluster based, so no meta is needed. 3190 */ 3191 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 3192 struct ocfs2_extent_list *el = 3193 &xb->xb_attrs.xb_root.xt_list; 3194 meta_add += ocfs2_extend_meta_needed(el); 3195 credits += ocfs2_calc_extend_credits(inode->i_sb, 3196 el); 3197 } else 3198 credits += OCFS2_SUBALLOC_ALLOC + 1; 3199 3200 /* 3201 * This cluster will be used either for new bucket or for 3202 * new xattr block. 3203 * If the cluster size is the same as the bucket size, one 3204 * more is needed since we may need to extend the bucket 3205 * also. 3206 */ 3207 clusters_add += 1; 3208 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3209 if (OCFS2_XATTR_BUCKET_SIZE == 3210 OCFS2_SB(inode->i_sb)->s_clustersize) { 3211 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3212 clusters_add += 1; 3213 } 3214 } else { 3215 credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; 3216 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 3217 struct ocfs2_extent_list *el = &def_xv.xv.xr_list; 3218 meta_add += ocfs2_extend_meta_needed(el); 3219 credits += ocfs2_calc_extend_credits(inode->i_sb, 3220 el); 3221 } else { 3222 meta_add += 1; 3223 } 3224 } 3225 out: 3226 if (clusters_need) 3227 *clusters_need = clusters_add; 3228 if (meta_need) 3229 *meta_need = meta_add; 3230 if (credits_need) 3231 *credits_need = credits; 3232 brelse(bh); 3233 return ret; 3234 } 3235 3236 static int ocfs2_init_xattr_set_ctxt(struct inode *inode, 3237 struct ocfs2_dinode *di, 3238 struct ocfs2_xattr_info *xi, 3239 struct ocfs2_xattr_search *xis, 3240 struct ocfs2_xattr_search *xbs, 3241 struct ocfs2_xattr_set_ctxt *ctxt, 3242 int extra_meta, 3243 int *credits) 3244 { 3245 int clusters_add, meta_add, ret; 3246 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 3247 3248 memset(ctxt, 0, sizeof(struct ocfs2_xattr_set_ctxt)); 3249 3250 ocfs2_init_dealloc_ctxt(&ctxt->dealloc); 3251 3252 ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs, 3253 &clusters_add, &meta_add, credits); 3254 if (ret) { 3255 mlog_errno(ret); 3256 return ret; 3257 } 3258 3259 meta_add += extra_meta; 3260 trace_ocfs2_init_xattr_set_ctxt(xi->xi_name, meta_add, 3261 clusters_add, *credits); 3262 3263 if (meta_add) { 3264 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, 3265 &ctxt->meta_ac); 3266 if (ret) { 3267 mlog_errno(ret); 3268 goto out; 3269 } 3270 } 3271 3272 if (clusters_add) { 3273 ret = ocfs2_reserve_clusters(osb, clusters_add, &ctxt->data_ac); 3274 if (ret) 3275 mlog_errno(ret); 3276 } 3277 out: 3278 if (ret) { 3279 if (ctxt->meta_ac) { 3280 ocfs2_free_alloc_context(ctxt->meta_ac); 3281 ctxt->meta_ac = NULL; 3282 } 3283 3284 /* 3285 * We cannot have an error and a non null ctxt->data_ac. 3286 */ 3287 } 3288 3289 return ret; 3290 } 3291 3292 static int __ocfs2_xattr_set_handle(struct inode *inode, 3293 struct ocfs2_dinode *di, 3294 struct ocfs2_xattr_info *xi, 3295 struct ocfs2_xattr_search *xis, 3296 struct ocfs2_xattr_search *xbs, 3297 struct ocfs2_xattr_set_ctxt *ctxt) 3298 { 3299 int ret = 0, credits, old_found; 3300 3301 if (!xi->xi_value) { 3302 /* Remove existing extended attribute */ 3303 if (!xis->not_found) 3304 ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt); 3305 else if (!xbs->not_found) 3306 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); 3307 } else { 3308 /* We always try to set extended attribute into inode first*/ 3309 ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt); 3310 if (!ret && !xbs->not_found) { 3311 /* 3312 * If succeed and that extended attribute existing in 3313 * external block, then we will remove it. 3314 */ 3315 xi->xi_value = NULL; 3316 xi->xi_value_len = 0; 3317 3318 old_found = xis->not_found; 3319 xis->not_found = -ENODATA; 3320 ret = ocfs2_calc_xattr_set_need(inode, 3321 di, 3322 xi, 3323 xis, 3324 xbs, 3325 NULL, 3326 NULL, 3327 &credits); 3328 xis->not_found = old_found; 3329 if (ret) { 3330 mlog_errno(ret); 3331 goto out; 3332 } 3333 3334 ret = ocfs2_extend_trans(ctxt->handle, credits); 3335 if (ret) { 3336 mlog_errno(ret); 3337 goto out; 3338 } 3339 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); 3340 } else if ((ret == -ENOSPC) && !ctxt->set_abort) { 3341 if (di->i_xattr_loc && !xbs->xattr_bh) { 3342 ret = ocfs2_xattr_block_find(inode, 3343 xi->xi_name_index, 3344 xi->xi_name, xbs); 3345 if (ret) 3346 goto out; 3347 3348 old_found = xis->not_found; 3349 xis->not_found = -ENODATA; 3350 ret = ocfs2_calc_xattr_set_need(inode, 3351 di, 3352 xi, 3353 xis, 3354 xbs, 3355 NULL, 3356 NULL, 3357 &credits); 3358 xis->not_found = old_found; 3359 if (ret) { 3360 mlog_errno(ret); 3361 goto out; 3362 } 3363 3364 ret = ocfs2_extend_trans(ctxt->handle, credits); 3365 if (ret) { 3366 mlog_errno(ret); 3367 goto out; 3368 } 3369 } 3370 /* 3371 * If no space in inode, we will set extended attribute 3372 * into external block. 3373 */ 3374 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); 3375 if (ret) 3376 goto out; 3377 if (!xis->not_found) { 3378 /* 3379 * If succeed and that extended attribute 3380 * existing in inode, we will remove it. 3381 */ 3382 xi->xi_value = NULL; 3383 xi->xi_value_len = 0; 3384 xbs->not_found = -ENODATA; 3385 ret = ocfs2_calc_xattr_set_need(inode, 3386 di, 3387 xi, 3388 xis, 3389 xbs, 3390 NULL, 3391 NULL, 3392 &credits); 3393 if (ret) { 3394 mlog_errno(ret); 3395 goto out; 3396 } 3397 3398 ret = ocfs2_extend_trans(ctxt->handle, credits); 3399 if (ret) { 3400 mlog_errno(ret); 3401 goto out; 3402 } 3403 ret = ocfs2_xattr_ibody_set(inode, xi, 3404 xis, ctxt); 3405 } 3406 } 3407 } 3408 3409 if (!ret) { 3410 /* Update inode ctime. */ 3411 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), 3412 xis->inode_bh, 3413 OCFS2_JOURNAL_ACCESS_WRITE); 3414 if (ret) { 3415 mlog_errno(ret); 3416 goto out; 3417 } 3418 3419 inode->i_ctime = CURRENT_TIME; 3420 di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); 3421 di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); 3422 ocfs2_journal_dirty(ctxt->handle, xis->inode_bh); 3423 } 3424 out: 3425 return ret; 3426 } 3427 3428 /* 3429 * This function only called duing creating inode 3430 * for init security/acl xattrs of the new inode. 3431 * All transanction credits have been reserved in mknod. 3432 */ 3433 int ocfs2_xattr_set_handle(handle_t *handle, 3434 struct inode *inode, 3435 struct buffer_head *di_bh, 3436 int name_index, 3437 const char *name, 3438 const void *value, 3439 size_t value_len, 3440 int flags, 3441 struct ocfs2_alloc_context *meta_ac, 3442 struct ocfs2_alloc_context *data_ac) 3443 { 3444 struct ocfs2_dinode *di; 3445 int ret; 3446 3447 struct ocfs2_xattr_info xi = { 3448 .xi_name_index = name_index, 3449 .xi_name = name, 3450 .xi_name_len = strlen(name), 3451 .xi_value = value, 3452 .xi_value_len = value_len, 3453 }; 3454 3455 struct ocfs2_xattr_search xis = { 3456 .not_found = -ENODATA, 3457 }; 3458 3459 struct ocfs2_xattr_search xbs = { 3460 .not_found = -ENODATA, 3461 }; 3462 3463 struct ocfs2_xattr_set_ctxt ctxt = { 3464 .handle = handle, 3465 .meta_ac = meta_ac, 3466 .data_ac = data_ac, 3467 }; 3468 3469 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) 3470 return -EOPNOTSUPP; 3471 3472 /* 3473 * In extreme situation, may need xattr bucket when 3474 * block size is too small. And we have already reserved 3475 * the credits for bucket in mknod. 3476 */ 3477 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) { 3478 xbs.bucket = ocfs2_xattr_bucket_new(inode); 3479 if (!xbs.bucket) { 3480 mlog_errno(-ENOMEM); 3481 return -ENOMEM; 3482 } 3483 } 3484 3485 xis.inode_bh = xbs.inode_bh = di_bh; 3486 di = (struct ocfs2_dinode *)di_bh->b_data; 3487 3488 down_write(&OCFS2_I(inode)->ip_xattr_sem); 3489 3490 ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis); 3491 if (ret) 3492 goto cleanup; 3493 if (xis.not_found) { 3494 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs); 3495 if (ret) 3496 goto cleanup; 3497 } 3498 3499 ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt); 3500 3501 cleanup: 3502 up_write(&OCFS2_I(inode)->ip_xattr_sem); 3503 brelse(xbs.xattr_bh); 3504 ocfs2_xattr_bucket_free(xbs.bucket); 3505 3506 return ret; 3507 } 3508 3509 /* 3510 * ocfs2_xattr_set() 3511 * 3512 * Set, replace or remove an extended attribute for this inode. 3513 * value is NULL to remove an existing extended attribute, else either 3514 * create or replace an extended attribute. 3515 */ 3516 int ocfs2_xattr_set(struct inode *inode, 3517 int name_index, 3518 const char *name, 3519 const void *value, 3520 size_t value_len, 3521 int flags) 3522 { 3523 struct buffer_head *di_bh = NULL; 3524 struct ocfs2_dinode *di; 3525 int ret, credits, ref_meta = 0, ref_credits = 0; 3526 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 3527 struct inode *tl_inode = osb->osb_tl_inode; 3528 struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, NULL, }; 3529 struct ocfs2_refcount_tree *ref_tree = NULL; 3530 3531 struct ocfs2_xattr_info xi = { 3532 .xi_name_index = name_index, 3533 .xi_name = name, 3534 .xi_name_len = strlen(name), 3535 .xi_value = value, 3536 .xi_value_len = value_len, 3537 }; 3538 3539 struct ocfs2_xattr_search xis = { 3540 .not_found = -ENODATA, 3541 }; 3542 3543 struct ocfs2_xattr_search xbs = { 3544 .not_found = -ENODATA, 3545 }; 3546 3547 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) 3548 return -EOPNOTSUPP; 3549 3550 /* 3551 * Only xbs will be used on indexed trees. xis doesn't need a 3552 * bucket. 3553 */ 3554 xbs.bucket = ocfs2_xattr_bucket_new(inode); 3555 if (!xbs.bucket) { 3556 mlog_errno(-ENOMEM); 3557 return -ENOMEM; 3558 } 3559 3560 ret = ocfs2_inode_lock(inode, &di_bh, 1); 3561 if (ret < 0) { 3562 mlog_errno(ret); 3563 goto cleanup_nolock; 3564 } 3565 xis.inode_bh = xbs.inode_bh = di_bh; 3566 di = (struct ocfs2_dinode *)di_bh->b_data; 3567 3568 down_write(&OCFS2_I(inode)->ip_xattr_sem); 3569 /* 3570 * Scan inode and external block to find the same name 3571 * extended attribute and collect search information. 3572 */ 3573 ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis); 3574 if (ret) 3575 goto cleanup; 3576 if (xis.not_found) { 3577 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs); 3578 if (ret) 3579 goto cleanup; 3580 } 3581 3582 if (xis.not_found && xbs.not_found) { 3583 ret = -ENODATA; 3584 if (flags & XATTR_REPLACE) 3585 goto cleanup; 3586 ret = 0; 3587 if (!value) 3588 goto cleanup; 3589 } else { 3590 ret = -EEXIST; 3591 if (flags & XATTR_CREATE) 3592 goto cleanup; 3593 } 3594 3595 /* Check whether the value is refcounted and do some preparation. */ 3596 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL && 3597 (!xis.not_found || !xbs.not_found)) { 3598 ret = ocfs2_prepare_refcount_xattr(inode, di, &xi, 3599 &xis, &xbs, &ref_tree, 3600 &ref_meta, &ref_credits); 3601 if (ret) { 3602 mlog_errno(ret); 3603 goto cleanup; 3604 } 3605 } 3606 3607 mutex_lock(&tl_inode->i_mutex); 3608 3609 if (ocfs2_truncate_log_needs_flush(osb)) { 3610 ret = __ocfs2_flush_truncate_log(osb); 3611 if (ret < 0) { 3612 mutex_unlock(&tl_inode->i_mutex); 3613 mlog_errno(ret); 3614 goto cleanup; 3615 } 3616 } 3617 mutex_unlock(&tl_inode->i_mutex); 3618 3619 ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis, 3620 &xbs, &ctxt, ref_meta, &credits); 3621 if (ret) { 3622 mlog_errno(ret); 3623 goto cleanup; 3624 } 3625 3626 /* we need to update inode's ctime field, so add credit for it. */ 3627 credits += OCFS2_INODE_UPDATE_CREDITS; 3628 ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits); 3629 if (IS_ERR(ctxt.handle)) { 3630 ret = PTR_ERR(ctxt.handle); 3631 mlog_errno(ret); 3632 goto out_free_ac; 3633 } 3634 3635 ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt); 3636 ocfs2_update_inode_fsync_trans(ctxt.handle, inode, 0); 3637 3638 ocfs2_commit_trans(osb, ctxt.handle); 3639 3640 out_free_ac: 3641 if (ctxt.data_ac) 3642 ocfs2_free_alloc_context(ctxt.data_ac); 3643 if (ctxt.meta_ac) 3644 ocfs2_free_alloc_context(ctxt.meta_ac); 3645 if (ocfs2_dealloc_has_cluster(&ctxt.dealloc)) 3646 ocfs2_schedule_truncate_log_flush(osb, 1); 3647 ocfs2_run_deallocs(osb, &ctxt.dealloc); 3648 3649 cleanup: 3650 if (ref_tree) 3651 ocfs2_unlock_refcount_tree(osb, ref_tree, 1); 3652 up_write(&OCFS2_I(inode)->ip_xattr_sem); 3653 if (!value && !ret) { 3654 ret = ocfs2_try_remove_refcount_tree(inode, di_bh); 3655 if (ret) 3656 mlog_errno(ret); 3657 } 3658 ocfs2_inode_unlock(inode, 1); 3659 cleanup_nolock: 3660 brelse(di_bh); 3661 brelse(xbs.xattr_bh); 3662 ocfs2_xattr_bucket_free(xbs.bucket); 3663 3664 return ret; 3665 } 3666 3667 /* 3668 * Find the xattr extent rec which may contains name_hash. 3669 * e_cpos will be the first name hash of the xattr rec. 3670 * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list. 3671 */ 3672 static int ocfs2_xattr_get_rec(struct inode *inode, 3673 u32 name_hash, 3674 u64 *p_blkno, 3675 u32 *e_cpos, 3676 u32 *num_clusters, 3677 struct ocfs2_extent_list *el) 3678 { 3679 int ret = 0, i; 3680 struct buffer_head *eb_bh = NULL; 3681 struct ocfs2_extent_block *eb; 3682 struct ocfs2_extent_rec *rec = NULL; 3683 u64 e_blkno = 0; 3684 3685 if (el->l_tree_depth) { 3686 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, name_hash, 3687 &eb_bh); 3688 if (ret) { 3689 mlog_errno(ret); 3690 goto out; 3691 } 3692 3693 eb = (struct ocfs2_extent_block *) eb_bh->b_data; 3694 el = &eb->h_list; 3695 3696 if (el->l_tree_depth) { 3697 ocfs2_error(inode->i_sb, 3698 "Inode %lu has non zero tree depth in " 3699 "xattr tree block %llu\n", inode->i_ino, 3700 (unsigned long long)eb_bh->b_blocknr); 3701 ret = -EROFS; 3702 goto out; 3703 } 3704 } 3705 3706 for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) { 3707 rec = &el->l_recs[i]; 3708 3709 if (le32_to_cpu(rec->e_cpos) <= name_hash) { 3710 e_blkno = le64_to_cpu(rec->e_blkno); 3711 break; 3712 } 3713 } 3714 3715 if (!e_blkno) { 3716 ocfs2_error(inode->i_sb, "Inode %lu has bad extent " 3717 "record (%u, %u, 0) in xattr", inode->i_ino, 3718 le32_to_cpu(rec->e_cpos), 3719 ocfs2_rec_clusters(el, rec)); 3720 ret = -EROFS; 3721 goto out; 3722 } 3723 3724 *p_blkno = le64_to_cpu(rec->e_blkno); 3725 *num_clusters = le16_to_cpu(rec->e_leaf_clusters); 3726 if (e_cpos) 3727 *e_cpos = le32_to_cpu(rec->e_cpos); 3728 out: 3729 brelse(eb_bh); 3730 return ret; 3731 } 3732 3733 typedef int (xattr_bucket_func)(struct inode *inode, 3734 struct ocfs2_xattr_bucket *bucket, 3735 void *para); 3736 3737 static int ocfs2_find_xe_in_bucket(struct inode *inode, 3738 struct ocfs2_xattr_bucket *bucket, 3739 int name_index, 3740 const char *name, 3741 u32 name_hash, 3742 u16 *xe_index, 3743 int *found) 3744 { 3745 int i, ret = 0, cmp = 1, block_off, new_offset; 3746 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 3747 size_t name_len = strlen(name); 3748 struct ocfs2_xattr_entry *xe = NULL; 3749 char *xe_name; 3750 3751 /* 3752 * We don't use binary search in the bucket because there 3753 * may be multiple entries with the same name hash. 3754 */ 3755 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 3756 xe = &xh->xh_entries[i]; 3757 3758 if (name_hash > le32_to_cpu(xe->xe_name_hash)) 3759 continue; 3760 else if (name_hash < le32_to_cpu(xe->xe_name_hash)) 3761 break; 3762 3763 cmp = name_index - ocfs2_xattr_get_type(xe); 3764 if (!cmp) 3765 cmp = name_len - xe->xe_name_len; 3766 if (cmp) 3767 continue; 3768 3769 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 3770 xh, 3771 i, 3772 &block_off, 3773 &new_offset); 3774 if (ret) { 3775 mlog_errno(ret); 3776 break; 3777 } 3778 3779 3780 xe_name = bucket_block(bucket, block_off) + new_offset; 3781 if (!memcmp(name, xe_name, name_len)) { 3782 *xe_index = i; 3783 *found = 1; 3784 ret = 0; 3785 break; 3786 } 3787 } 3788 3789 return ret; 3790 } 3791 3792 /* 3793 * Find the specified xattr entry in a series of buckets. 3794 * This series start from p_blkno and last for num_clusters. 3795 * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains 3796 * the num of the valid buckets. 3797 * 3798 * Return the buffer_head this xattr should reside in. And if the xattr's 3799 * hash is in the gap of 2 buckets, return the lower bucket. 3800 */ 3801 static int ocfs2_xattr_bucket_find(struct inode *inode, 3802 int name_index, 3803 const char *name, 3804 u32 name_hash, 3805 u64 p_blkno, 3806 u32 first_hash, 3807 u32 num_clusters, 3808 struct ocfs2_xattr_search *xs) 3809 { 3810 int ret, found = 0; 3811 struct ocfs2_xattr_header *xh = NULL; 3812 struct ocfs2_xattr_entry *xe = NULL; 3813 u16 index = 0; 3814 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3815 int low_bucket = 0, bucket, high_bucket; 3816 struct ocfs2_xattr_bucket *search; 3817 u32 last_hash; 3818 u64 blkno, lower_blkno = 0; 3819 3820 search = ocfs2_xattr_bucket_new(inode); 3821 if (!search) { 3822 ret = -ENOMEM; 3823 mlog_errno(ret); 3824 goto out; 3825 } 3826 3827 ret = ocfs2_read_xattr_bucket(search, p_blkno); 3828 if (ret) { 3829 mlog_errno(ret); 3830 goto out; 3831 } 3832 3833 xh = bucket_xh(search); 3834 high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1; 3835 while (low_bucket <= high_bucket) { 3836 ocfs2_xattr_bucket_relse(search); 3837 3838 bucket = (low_bucket + high_bucket) / 2; 3839 blkno = p_blkno + bucket * blk_per_bucket; 3840 ret = ocfs2_read_xattr_bucket(search, blkno); 3841 if (ret) { 3842 mlog_errno(ret); 3843 goto out; 3844 } 3845 3846 xh = bucket_xh(search); 3847 xe = &xh->xh_entries[0]; 3848 if (name_hash < le32_to_cpu(xe->xe_name_hash)) { 3849 high_bucket = bucket - 1; 3850 continue; 3851 } 3852 3853 /* 3854 * Check whether the hash of the last entry in our 3855 * bucket is larger than the search one. for an empty 3856 * bucket, the last one is also the first one. 3857 */ 3858 if (xh->xh_count) 3859 xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1]; 3860 3861 last_hash = le32_to_cpu(xe->xe_name_hash); 3862 3863 /* record lower_blkno which may be the insert place. */ 3864 lower_blkno = blkno; 3865 3866 if (name_hash > le32_to_cpu(xe->xe_name_hash)) { 3867 low_bucket = bucket + 1; 3868 continue; 3869 } 3870 3871 /* the searched xattr should reside in this bucket if exists. */ 3872 ret = ocfs2_find_xe_in_bucket(inode, search, 3873 name_index, name, name_hash, 3874 &index, &found); 3875 if (ret) { 3876 mlog_errno(ret); 3877 goto out; 3878 } 3879 break; 3880 } 3881 3882 /* 3883 * Record the bucket we have found. 3884 * When the xattr's hash value is in the gap of 2 buckets, we will 3885 * always set it to the previous bucket. 3886 */ 3887 if (!lower_blkno) 3888 lower_blkno = p_blkno; 3889 3890 /* This should be in cache - we just read it during the search */ 3891 ret = ocfs2_read_xattr_bucket(xs->bucket, lower_blkno); 3892 if (ret) { 3893 mlog_errno(ret); 3894 goto out; 3895 } 3896 3897 xs->header = bucket_xh(xs->bucket); 3898 xs->base = bucket_block(xs->bucket, 0); 3899 xs->end = xs->base + inode->i_sb->s_blocksize; 3900 3901 if (found) { 3902 xs->here = &xs->header->xh_entries[index]; 3903 trace_ocfs2_xattr_bucket_find(OCFS2_I(inode)->ip_blkno, 3904 name, name_index, name_hash, 3905 (unsigned long long)bucket_blkno(xs->bucket), 3906 index); 3907 } else 3908 ret = -ENODATA; 3909 3910 out: 3911 ocfs2_xattr_bucket_free(search); 3912 return ret; 3913 } 3914 3915 static int ocfs2_xattr_index_block_find(struct inode *inode, 3916 struct buffer_head *root_bh, 3917 int name_index, 3918 const char *name, 3919 struct ocfs2_xattr_search *xs) 3920 { 3921 int ret; 3922 struct ocfs2_xattr_block *xb = 3923 (struct ocfs2_xattr_block *)root_bh->b_data; 3924 struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root; 3925 struct ocfs2_extent_list *el = &xb_root->xt_list; 3926 u64 p_blkno = 0; 3927 u32 first_hash, num_clusters = 0; 3928 u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name)); 3929 3930 if (le16_to_cpu(el->l_next_free_rec) == 0) 3931 return -ENODATA; 3932 3933 trace_ocfs2_xattr_index_block_find(OCFS2_I(inode)->ip_blkno, 3934 name, name_index, name_hash, 3935 (unsigned long long)root_bh->b_blocknr, 3936 -1); 3937 3938 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash, 3939 &num_clusters, el); 3940 if (ret) { 3941 mlog_errno(ret); 3942 goto out; 3943 } 3944 3945 BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash); 3946 3947 trace_ocfs2_xattr_index_block_find_rec(OCFS2_I(inode)->ip_blkno, 3948 name, name_index, first_hash, 3949 (unsigned long long)p_blkno, 3950 num_clusters); 3951 3952 ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash, 3953 p_blkno, first_hash, num_clusters, xs); 3954 3955 out: 3956 return ret; 3957 } 3958 3959 static int ocfs2_iterate_xattr_buckets(struct inode *inode, 3960 u64 blkno, 3961 u32 clusters, 3962 xattr_bucket_func *func, 3963 void *para) 3964 { 3965 int i, ret = 0; 3966 u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)); 3967 u32 num_buckets = clusters * bpc; 3968 struct ocfs2_xattr_bucket *bucket; 3969 3970 bucket = ocfs2_xattr_bucket_new(inode); 3971 if (!bucket) { 3972 mlog_errno(-ENOMEM); 3973 return -ENOMEM; 3974 } 3975 3976 trace_ocfs2_iterate_xattr_buckets( 3977 (unsigned long long)OCFS2_I(inode)->ip_blkno, 3978 (unsigned long long)blkno, clusters); 3979 3980 for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) { 3981 ret = ocfs2_read_xattr_bucket(bucket, blkno); 3982 if (ret) { 3983 mlog_errno(ret); 3984 break; 3985 } 3986 3987 /* 3988 * The real bucket num in this series of blocks is stored 3989 * in the 1st bucket. 3990 */ 3991 if (i == 0) 3992 num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets); 3993 3994 trace_ocfs2_iterate_xattr_bucket((unsigned long long)blkno, 3995 le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash)); 3996 if (func) { 3997 ret = func(inode, bucket, para); 3998 if (ret && ret != -ERANGE) 3999 mlog_errno(ret); 4000 /* Fall through to bucket_relse() */ 4001 } 4002 4003 ocfs2_xattr_bucket_relse(bucket); 4004 if (ret) 4005 break; 4006 } 4007 4008 ocfs2_xattr_bucket_free(bucket); 4009 return ret; 4010 } 4011 4012 struct ocfs2_xattr_tree_list { 4013 char *buffer; 4014 size_t buffer_size; 4015 size_t result; 4016 }; 4017 4018 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb, 4019 struct ocfs2_xattr_header *xh, 4020 int index, 4021 int *block_off, 4022 int *new_offset) 4023 { 4024 u16 name_offset; 4025 4026 if (index < 0 || index >= le16_to_cpu(xh->xh_count)) 4027 return -EINVAL; 4028 4029 name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset); 4030 4031 *block_off = name_offset >> sb->s_blocksize_bits; 4032 *new_offset = name_offset % sb->s_blocksize; 4033 4034 return 0; 4035 } 4036 4037 static int ocfs2_list_xattr_bucket(struct inode *inode, 4038 struct ocfs2_xattr_bucket *bucket, 4039 void *para) 4040 { 4041 int ret = 0, type; 4042 struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para; 4043 int i, block_off, new_offset; 4044 const char *prefix, *name; 4045 4046 for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) { 4047 struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i]; 4048 type = ocfs2_xattr_get_type(entry); 4049 prefix = ocfs2_xattr_prefix(type); 4050 4051 if (prefix) { 4052 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 4053 bucket_xh(bucket), 4054 i, 4055 &block_off, 4056 &new_offset); 4057 if (ret) 4058 break; 4059 4060 name = (const char *)bucket_block(bucket, block_off) + 4061 new_offset; 4062 ret = ocfs2_xattr_list_entry(xl->buffer, 4063 xl->buffer_size, 4064 &xl->result, 4065 prefix, name, 4066 entry->xe_name_len); 4067 if (ret) 4068 break; 4069 } 4070 } 4071 4072 return ret; 4073 } 4074 4075 static int ocfs2_iterate_xattr_index_block(struct inode *inode, 4076 struct buffer_head *blk_bh, 4077 xattr_tree_rec_func *rec_func, 4078 void *para) 4079 { 4080 struct ocfs2_xattr_block *xb = 4081 (struct ocfs2_xattr_block *)blk_bh->b_data; 4082 struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list; 4083 int ret = 0; 4084 u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0; 4085 u64 p_blkno = 0; 4086 4087 if (!el->l_next_free_rec || !rec_func) 4088 return 0; 4089 4090 while (name_hash > 0) { 4091 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, 4092 &e_cpos, &num_clusters, el); 4093 if (ret) { 4094 mlog_errno(ret); 4095 break; 4096 } 4097 4098 ret = rec_func(inode, blk_bh, p_blkno, e_cpos, 4099 num_clusters, para); 4100 if (ret) { 4101 if (ret != -ERANGE) 4102 mlog_errno(ret); 4103 break; 4104 } 4105 4106 if (e_cpos == 0) 4107 break; 4108 4109 name_hash = e_cpos - 1; 4110 } 4111 4112 return ret; 4113 4114 } 4115 4116 static int ocfs2_list_xattr_tree_rec(struct inode *inode, 4117 struct buffer_head *root_bh, 4118 u64 blkno, u32 cpos, u32 len, void *para) 4119 { 4120 return ocfs2_iterate_xattr_buckets(inode, blkno, len, 4121 ocfs2_list_xattr_bucket, para); 4122 } 4123 4124 static int ocfs2_xattr_tree_list_index_block(struct inode *inode, 4125 struct buffer_head *blk_bh, 4126 char *buffer, 4127 size_t buffer_size) 4128 { 4129 int ret; 4130 struct ocfs2_xattr_tree_list xl = { 4131 .buffer = buffer, 4132 .buffer_size = buffer_size, 4133 .result = 0, 4134 }; 4135 4136 ret = ocfs2_iterate_xattr_index_block(inode, blk_bh, 4137 ocfs2_list_xattr_tree_rec, &xl); 4138 if (ret) { 4139 mlog_errno(ret); 4140 goto out; 4141 } 4142 4143 ret = xl.result; 4144 out: 4145 return ret; 4146 } 4147 4148 static int cmp_xe(const void *a, const void *b) 4149 { 4150 const struct ocfs2_xattr_entry *l = a, *r = b; 4151 u32 l_hash = le32_to_cpu(l->xe_name_hash); 4152 u32 r_hash = le32_to_cpu(r->xe_name_hash); 4153 4154 if (l_hash > r_hash) 4155 return 1; 4156 if (l_hash < r_hash) 4157 return -1; 4158 return 0; 4159 } 4160 4161 static void swap_xe(void *a, void *b, int size) 4162 { 4163 struct ocfs2_xattr_entry *l = a, *r = b, tmp; 4164 4165 tmp = *l; 4166 memcpy(l, r, sizeof(struct ocfs2_xattr_entry)); 4167 memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry)); 4168 } 4169 4170 /* 4171 * When the ocfs2_xattr_block is filled up, new bucket will be created 4172 * and all the xattr entries will be moved to the new bucket. 4173 * The header goes at the start of the bucket, and the names+values are 4174 * filled from the end. This is why *target starts as the last buffer. 4175 * Note: we need to sort the entries since they are not saved in order 4176 * in the ocfs2_xattr_block. 4177 */ 4178 static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode, 4179 struct buffer_head *xb_bh, 4180 struct ocfs2_xattr_bucket *bucket) 4181 { 4182 int i, blocksize = inode->i_sb->s_blocksize; 4183 int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 4184 u16 offset, size, off_change; 4185 struct ocfs2_xattr_entry *xe; 4186 struct ocfs2_xattr_block *xb = 4187 (struct ocfs2_xattr_block *)xb_bh->b_data; 4188 struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header; 4189 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 4190 u16 count = le16_to_cpu(xb_xh->xh_count); 4191 char *src = xb_bh->b_data; 4192 char *target = bucket_block(bucket, blks - 1); 4193 4194 trace_ocfs2_cp_xattr_block_to_bucket_begin( 4195 (unsigned long long)xb_bh->b_blocknr, 4196 (unsigned long long)bucket_blkno(bucket)); 4197 4198 for (i = 0; i < blks; i++) 4199 memset(bucket_block(bucket, i), 0, blocksize); 4200 4201 /* 4202 * Since the xe_name_offset is based on ocfs2_xattr_header, 4203 * there is a offset change corresponding to the change of 4204 * ocfs2_xattr_header's position. 4205 */ 4206 off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); 4207 xe = &xb_xh->xh_entries[count - 1]; 4208 offset = le16_to_cpu(xe->xe_name_offset) + off_change; 4209 size = blocksize - offset; 4210 4211 /* copy all the names and values. */ 4212 memcpy(target + offset, src + offset, size); 4213 4214 /* Init new header now. */ 4215 xh->xh_count = xb_xh->xh_count; 4216 xh->xh_num_buckets = cpu_to_le16(1); 4217 xh->xh_name_value_len = cpu_to_le16(size); 4218 xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size); 4219 4220 /* copy all the entries. */ 4221 target = bucket_block(bucket, 0); 4222 offset = offsetof(struct ocfs2_xattr_header, xh_entries); 4223 size = count * sizeof(struct ocfs2_xattr_entry); 4224 memcpy(target + offset, (char *)xb_xh + offset, size); 4225 4226 /* Change the xe offset for all the xe because of the move. */ 4227 off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize + 4228 offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); 4229 for (i = 0; i < count; i++) 4230 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change); 4231 4232 trace_ocfs2_cp_xattr_block_to_bucket_end(offset, size, off_change); 4233 4234 sort(target + offset, count, sizeof(struct ocfs2_xattr_entry), 4235 cmp_xe, swap_xe); 4236 } 4237 4238 /* 4239 * After we move xattr from block to index btree, we have to 4240 * update ocfs2_xattr_search to the new xe and base. 4241 * 4242 * When the entry is in xattr block, xattr_bh indicates the storage place. 4243 * While if the entry is in index b-tree, "bucket" indicates the 4244 * real place of the xattr. 4245 */ 4246 static void ocfs2_xattr_update_xattr_search(struct inode *inode, 4247 struct ocfs2_xattr_search *xs, 4248 struct buffer_head *old_bh) 4249 { 4250 char *buf = old_bh->b_data; 4251 struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf; 4252 struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header; 4253 int i; 4254 4255 xs->header = bucket_xh(xs->bucket); 4256 xs->base = bucket_block(xs->bucket, 0); 4257 xs->end = xs->base + inode->i_sb->s_blocksize; 4258 4259 if (xs->not_found) 4260 return; 4261 4262 i = xs->here - old_xh->xh_entries; 4263 xs->here = &xs->header->xh_entries[i]; 4264 } 4265 4266 static int ocfs2_xattr_create_index_block(struct inode *inode, 4267 struct ocfs2_xattr_search *xs, 4268 struct ocfs2_xattr_set_ctxt *ctxt) 4269 { 4270 int ret; 4271 u32 bit_off, len; 4272 u64 blkno; 4273 handle_t *handle = ctxt->handle; 4274 struct ocfs2_inode_info *oi = OCFS2_I(inode); 4275 struct buffer_head *xb_bh = xs->xattr_bh; 4276 struct ocfs2_xattr_block *xb = 4277 (struct ocfs2_xattr_block *)xb_bh->b_data; 4278 struct ocfs2_xattr_tree_root *xr; 4279 u16 xb_flags = le16_to_cpu(xb->xb_flags); 4280 4281 trace_ocfs2_xattr_create_index_block_begin( 4282 (unsigned long long)xb_bh->b_blocknr); 4283 4284 BUG_ON(xb_flags & OCFS2_XATTR_INDEXED); 4285 BUG_ON(!xs->bucket); 4286 4287 /* 4288 * XXX: 4289 * We can use this lock for now, and maybe move to a dedicated mutex 4290 * if performance becomes a problem later. 4291 */ 4292 down_write(&oi->ip_alloc_sem); 4293 4294 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), xb_bh, 4295 OCFS2_JOURNAL_ACCESS_WRITE); 4296 if (ret) { 4297 mlog_errno(ret); 4298 goto out; 4299 } 4300 4301 ret = __ocfs2_claim_clusters(handle, ctxt->data_ac, 4302 1, 1, &bit_off, &len); 4303 if (ret) { 4304 mlog_errno(ret); 4305 goto out; 4306 } 4307 4308 /* 4309 * The bucket may spread in many blocks, and 4310 * we will only touch the 1st block and the last block 4311 * in the whole bucket(one for entry and one for data). 4312 */ 4313 blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off); 4314 4315 trace_ocfs2_xattr_create_index_block((unsigned long long)blkno); 4316 4317 ret = ocfs2_init_xattr_bucket(xs->bucket, blkno, 1); 4318 if (ret) { 4319 mlog_errno(ret); 4320 goto out; 4321 } 4322 4323 ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket, 4324 OCFS2_JOURNAL_ACCESS_CREATE); 4325 if (ret) { 4326 mlog_errno(ret); 4327 goto out; 4328 } 4329 4330 ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket); 4331 ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket); 4332 4333 ocfs2_xattr_update_xattr_search(inode, xs, xb_bh); 4334 4335 /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */ 4336 memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize - 4337 offsetof(struct ocfs2_xattr_block, xb_attrs)); 4338 4339 xr = &xb->xb_attrs.xb_root; 4340 xr->xt_clusters = cpu_to_le32(1); 4341 xr->xt_last_eb_blk = 0; 4342 xr->xt_list.l_tree_depth = 0; 4343 xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb)); 4344 xr->xt_list.l_next_free_rec = cpu_to_le16(1); 4345 4346 xr->xt_list.l_recs[0].e_cpos = 0; 4347 xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno); 4348 xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1); 4349 4350 xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED); 4351 4352 ocfs2_journal_dirty(handle, xb_bh); 4353 4354 out: 4355 up_write(&oi->ip_alloc_sem); 4356 4357 return ret; 4358 } 4359 4360 static int cmp_xe_offset(const void *a, const void *b) 4361 { 4362 const struct ocfs2_xattr_entry *l = a, *r = b; 4363 u32 l_name_offset = le16_to_cpu(l->xe_name_offset); 4364 u32 r_name_offset = le16_to_cpu(r->xe_name_offset); 4365 4366 if (l_name_offset < r_name_offset) 4367 return 1; 4368 if (l_name_offset > r_name_offset) 4369 return -1; 4370 return 0; 4371 } 4372 4373 /* 4374 * defrag a xattr bucket if we find that the bucket has some 4375 * holes beteen name/value pairs. 4376 * We will move all the name/value pairs to the end of the bucket 4377 * so that we can spare some space for insertion. 4378 */ 4379 static int ocfs2_defrag_xattr_bucket(struct inode *inode, 4380 handle_t *handle, 4381 struct ocfs2_xattr_bucket *bucket) 4382 { 4383 int ret, i; 4384 size_t end, offset, len; 4385 struct ocfs2_xattr_header *xh; 4386 char *entries, *buf, *bucket_buf = NULL; 4387 u64 blkno = bucket_blkno(bucket); 4388 u16 xh_free_start; 4389 size_t blocksize = inode->i_sb->s_blocksize; 4390 struct ocfs2_xattr_entry *xe; 4391 4392 /* 4393 * In order to make the operation more efficient and generic, 4394 * we copy all the blocks into a contiguous memory and do the 4395 * defragment there, so if anything is error, we will not touch 4396 * the real block. 4397 */ 4398 bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS); 4399 if (!bucket_buf) { 4400 ret = -EIO; 4401 goto out; 4402 } 4403 4404 buf = bucket_buf; 4405 for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize) 4406 memcpy(buf, bucket_block(bucket, i), blocksize); 4407 4408 ret = ocfs2_xattr_bucket_journal_access(handle, bucket, 4409 OCFS2_JOURNAL_ACCESS_WRITE); 4410 if (ret < 0) { 4411 mlog_errno(ret); 4412 goto out; 4413 } 4414 4415 xh = (struct ocfs2_xattr_header *)bucket_buf; 4416 entries = (char *)xh->xh_entries; 4417 xh_free_start = le16_to_cpu(xh->xh_free_start); 4418 4419 trace_ocfs2_defrag_xattr_bucket( 4420 (unsigned long long)blkno, le16_to_cpu(xh->xh_count), 4421 xh_free_start, le16_to_cpu(xh->xh_name_value_len)); 4422 4423 /* 4424 * sort all the entries by their offset. 4425 * the largest will be the first, so that we can 4426 * move them to the end one by one. 4427 */ 4428 sort(entries, le16_to_cpu(xh->xh_count), 4429 sizeof(struct ocfs2_xattr_entry), 4430 cmp_xe_offset, swap_xe); 4431 4432 /* Move all name/values to the end of the bucket. */ 4433 xe = xh->xh_entries; 4434 end = OCFS2_XATTR_BUCKET_SIZE; 4435 for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) { 4436 offset = le16_to_cpu(xe->xe_name_offset); 4437 len = namevalue_size_xe(xe); 4438 4439 /* 4440 * We must make sure that the name/value pair 4441 * exist in the same block. So adjust end to 4442 * the previous block end if needed. 4443 */ 4444 if (((end - len) / blocksize != 4445 (end - 1) / blocksize)) 4446 end = end - end % blocksize; 4447 4448 if (end > offset + len) { 4449 memmove(bucket_buf + end - len, 4450 bucket_buf + offset, len); 4451 xe->xe_name_offset = cpu_to_le16(end - len); 4452 } 4453 4454 mlog_bug_on_msg(end < offset + len, "Defrag check failed for " 4455 "bucket %llu\n", (unsigned long long)blkno); 4456 4457 end -= len; 4458 } 4459 4460 mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for " 4461 "bucket %llu\n", (unsigned long long)blkno); 4462 4463 if (xh_free_start == end) 4464 goto out; 4465 4466 memset(bucket_buf + xh_free_start, 0, end - xh_free_start); 4467 xh->xh_free_start = cpu_to_le16(end); 4468 4469 /* sort the entries by their name_hash. */ 4470 sort(entries, le16_to_cpu(xh->xh_count), 4471 sizeof(struct ocfs2_xattr_entry), 4472 cmp_xe, swap_xe); 4473 4474 buf = bucket_buf; 4475 for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize) 4476 memcpy(bucket_block(bucket, i), buf, blocksize); 4477 ocfs2_xattr_bucket_journal_dirty(handle, bucket); 4478 4479 out: 4480 kfree(bucket_buf); 4481 return ret; 4482 } 4483 4484 /* 4485 * prev_blkno points to the start of an existing extent. new_blkno 4486 * points to a newly allocated extent. Because we know each of our 4487 * clusters contains more than bucket, we can easily split one cluster 4488 * at a bucket boundary. So we take the last cluster of the existing 4489 * extent and split it down the middle. We move the last half of the 4490 * buckets in the last cluster of the existing extent over to the new 4491 * extent. 4492 * 4493 * first_bh is the buffer at prev_blkno so we can update the existing 4494 * extent's bucket count. header_bh is the bucket were we were hoping 4495 * to insert our xattr. If the bucket move places the target in the new 4496 * extent, we'll update first_bh and header_bh after modifying the old 4497 * extent. 4498 * 4499 * first_hash will be set as the 1st xe's name_hash in the new extent. 4500 */ 4501 static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode, 4502 handle_t *handle, 4503 struct ocfs2_xattr_bucket *first, 4504 struct ocfs2_xattr_bucket *target, 4505 u64 new_blkno, 4506 u32 num_clusters, 4507 u32 *first_hash) 4508 { 4509 int ret; 4510 struct super_block *sb = inode->i_sb; 4511 int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(sb); 4512 int num_buckets = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb)); 4513 int to_move = num_buckets / 2; 4514 u64 src_blkno; 4515 u64 last_cluster_blkno = bucket_blkno(first) + 4516 ((num_clusters - 1) * ocfs2_clusters_to_blocks(sb, 1)); 4517 4518 BUG_ON(le16_to_cpu(bucket_xh(first)->xh_num_buckets) < num_buckets); 4519 BUG_ON(OCFS2_XATTR_BUCKET_SIZE == OCFS2_SB(sb)->s_clustersize); 4520 4521 trace_ocfs2_mv_xattr_bucket_cross_cluster( 4522 (unsigned long long)last_cluster_blkno, 4523 (unsigned long long)new_blkno); 4524 4525 ret = ocfs2_mv_xattr_buckets(inode, handle, bucket_blkno(first), 4526 last_cluster_blkno, new_blkno, 4527 to_move, first_hash); 4528 if (ret) { 4529 mlog_errno(ret); 4530 goto out; 4531 } 4532 4533 /* This is the first bucket that got moved */ 4534 src_blkno = last_cluster_blkno + (to_move * blks_per_bucket); 4535 4536 /* 4537 * If the target bucket was part of the moved buckets, we need to 4538 * update first and target. 4539 */ 4540 if (bucket_blkno(target) >= src_blkno) { 4541 /* Find the block for the new target bucket */ 4542 src_blkno = new_blkno + 4543 (bucket_blkno(target) - src_blkno); 4544 4545 ocfs2_xattr_bucket_relse(first); 4546 ocfs2_xattr_bucket_relse(target); 4547 4548 /* 4549 * These shouldn't fail - the buffers are in the 4550 * journal from ocfs2_cp_xattr_bucket(). 4551 */ 4552 ret = ocfs2_read_xattr_bucket(first, new_blkno); 4553 if (ret) { 4554 mlog_errno(ret); 4555 goto out; 4556 } 4557 ret = ocfs2_read_xattr_bucket(target, src_blkno); 4558 if (ret) 4559 mlog_errno(ret); 4560 4561 } 4562 4563 out: 4564 return ret; 4565 } 4566 4567 /* 4568 * Find the suitable pos when we divide a bucket into 2. 4569 * We have to make sure the xattrs with the same hash value exist 4570 * in the same bucket. 4571 * 4572 * If this ocfs2_xattr_header covers more than one hash value, find a 4573 * place where the hash value changes. Try to find the most even split. 4574 * The most common case is that all entries have different hash values, 4575 * and the first check we make will find a place to split. 4576 */ 4577 static int ocfs2_xattr_find_divide_pos(struct ocfs2_xattr_header *xh) 4578 { 4579 struct ocfs2_xattr_entry *entries = xh->xh_entries; 4580 int count = le16_to_cpu(xh->xh_count); 4581 int delta, middle = count / 2; 4582 4583 /* 4584 * We start at the middle. Each step gets farther away in both 4585 * directions. We therefore hit the change in hash value 4586 * nearest to the middle. Note that this loop does not execute for 4587 * count < 2. 4588 */ 4589 for (delta = 0; delta < middle; delta++) { 4590 /* Let's check delta earlier than middle */ 4591 if (cmp_xe(&entries[middle - delta - 1], 4592 &entries[middle - delta])) 4593 return middle - delta; 4594 4595 /* For even counts, don't walk off the end */ 4596 if ((middle + delta + 1) == count) 4597 continue; 4598 4599 /* Now try delta past middle */ 4600 if (cmp_xe(&entries[middle + delta], 4601 &entries[middle + delta + 1])) 4602 return middle + delta + 1; 4603 } 4604 4605 /* Every entry had the same hash */ 4606 return count; 4607 } 4608 4609 /* 4610 * Move some xattrs in old bucket(blk) to new bucket(new_blk). 4611 * first_hash will record the 1st hash of the new bucket. 4612 * 4613 * Normally half of the xattrs will be moved. But we have to make 4614 * sure that the xattrs with the same hash value are stored in the 4615 * same bucket. If all the xattrs in this bucket have the same hash 4616 * value, the new bucket will be initialized as an empty one and the 4617 * first_hash will be initialized as (hash_value+1). 4618 */ 4619 static int ocfs2_divide_xattr_bucket(struct inode *inode, 4620 handle_t *handle, 4621 u64 blk, 4622 u64 new_blk, 4623 u32 *first_hash, 4624 int new_bucket_head) 4625 { 4626 int ret, i; 4627 int count, start, len, name_value_len = 0, name_offset = 0; 4628 struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL; 4629 struct ocfs2_xattr_header *xh; 4630 struct ocfs2_xattr_entry *xe; 4631 int blocksize = inode->i_sb->s_blocksize; 4632 4633 trace_ocfs2_divide_xattr_bucket_begin((unsigned long long)blk, 4634 (unsigned long long)new_blk); 4635 4636 s_bucket = ocfs2_xattr_bucket_new(inode); 4637 t_bucket = ocfs2_xattr_bucket_new(inode); 4638 if (!s_bucket || !t_bucket) { 4639 ret = -ENOMEM; 4640 mlog_errno(ret); 4641 goto out; 4642 } 4643 4644 ret = ocfs2_read_xattr_bucket(s_bucket, blk); 4645 if (ret) { 4646 mlog_errno(ret); 4647 goto out; 4648 } 4649 4650 ret = ocfs2_xattr_bucket_journal_access(handle, s_bucket, 4651 OCFS2_JOURNAL_ACCESS_WRITE); 4652 if (ret) { 4653 mlog_errno(ret); 4654 goto out; 4655 } 4656 4657 /* 4658 * Even if !new_bucket_head, we're overwriting t_bucket. Thus, 4659 * there's no need to read it. 4660 */ 4661 ret = ocfs2_init_xattr_bucket(t_bucket, new_blk, new_bucket_head); 4662 if (ret) { 4663 mlog_errno(ret); 4664 goto out; 4665 } 4666 4667 /* 4668 * Hey, if we're overwriting t_bucket, what difference does 4669 * ACCESS_CREATE vs ACCESS_WRITE make? See the comment in the 4670 * same part of ocfs2_cp_xattr_bucket(). 4671 */ 4672 ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket, 4673 new_bucket_head ? 4674 OCFS2_JOURNAL_ACCESS_CREATE : 4675 OCFS2_JOURNAL_ACCESS_WRITE); 4676 if (ret) { 4677 mlog_errno(ret); 4678 goto out; 4679 } 4680 4681 xh = bucket_xh(s_bucket); 4682 count = le16_to_cpu(xh->xh_count); 4683 start = ocfs2_xattr_find_divide_pos(xh); 4684 4685 if (start == count) { 4686 xe = &xh->xh_entries[start-1]; 4687 4688 /* 4689 * initialized a new empty bucket here. 4690 * The hash value is set as one larger than 4691 * that of the last entry in the previous bucket. 4692 */ 4693 for (i = 0; i < t_bucket->bu_blocks; i++) 4694 memset(bucket_block(t_bucket, i), 0, blocksize); 4695 4696 xh = bucket_xh(t_bucket); 4697 xh->xh_free_start = cpu_to_le16(blocksize); 4698 xh->xh_entries[0].xe_name_hash = xe->xe_name_hash; 4699 le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1); 4700 4701 goto set_num_buckets; 4702 } 4703 4704 /* copy the whole bucket to the new first. */ 4705 ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket); 4706 4707 /* update the new bucket. */ 4708 xh = bucket_xh(t_bucket); 4709 4710 /* 4711 * Calculate the total name/value len and xh_free_start for 4712 * the old bucket first. 4713 */ 4714 name_offset = OCFS2_XATTR_BUCKET_SIZE; 4715 name_value_len = 0; 4716 for (i = 0; i < start; i++) { 4717 xe = &xh->xh_entries[i]; 4718 name_value_len += namevalue_size_xe(xe); 4719 if (le16_to_cpu(xe->xe_name_offset) < name_offset) 4720 name_offset = le16_to_cpu(xe->xe_name_offset); 4721 } 4722 4723 /* 4724 * Now begin the modification to the new bucket. 4725 * 4726 * In the new bucket, We just move the xattr entry to the beginning 4727 * and don't touch the name/value. So there will be some holes in the 4728 * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is 4729 * called. 4730 */ 4731 xe = &xh->xh_entries[start]; 4732 len = sizeof(struct ocfs2_xattr_entry) * (count - start); 4733 trace_ocfs2_divide_xattr_bucket_move(len, 4734 (int)((char *)xe - (char *)xh), 4735 (int)((char *)xh->xh_entries - (char *)xh)); 4736 memmove((char *)xh->xh_entries, (char *)xe, len); 4737 xe = &xh->xh_entries[count - start]; 4738 len = sizeof(struct ocfs2_xattr_entry) * start; 4739 memset((char *)xe, 0, len); 4740 4741 le16_add_cpu(&xh->xh_count, -start); 4742 le16_add_cpu(&xh->xh_name_value_len, -name_value_len); 4743 4744 /* Calculate xh_free_start for the new bucket. */ 4745 xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE); 4746 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 4747 xe = &xh->xh_entries[i]; 4748 if (le16_to_cpu(xe->xe_name_offset) < 4749 le16_to_cpu(xh->xh_free_start)) 4750 xh->xh_free_start = xe->xe_name_offset; 4751 } 4752 4753 set_num_buckets: 4754 /* set xh->xh_num_buckets for the new xh. */ 4755 if (new_bucket_head) 4756 xh->xh_num_buckets = cpu_to_le16(1); 4757 else 4758 xh->xh_num_buckets = 0; 4759 4760 ocfs2_xattr_bucket_journal_dirty(handle, t_bucket); 4761 4762 /* store the first_hash of the new bucket. */ 4763 if (first_hash) 4764 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash); 4765 4766 /* 4767 * Now only update the 1st block of the old bucket. If we 4768 * just added a new empty bucket, there is no need to modify 4769 * it. 4770 */ 4771 if (start == count) 4772 goto out; 4773 4774 xh = bucket_xh(s_bucket); 4775 memset(&xh->xh_entries[start], 0, 4776 sizeof(struct ocfs2_xattr_entry) * (count - start)); 4777 xh->xh_count = cpu_to_le16(start); 4778 xh->xh_free_start = cpu_to_le16(name_offset); 4779 xh->xh_name_value_len = cpu_to_le16(name_value_len); 4780 4781 ocfs2_xattr_bucket_journal_dirty(handle, s_bucket); 4782 4783 out: 4784 ocfs2_xattr_bucket_free(s_bucket); 4785 ocfs2_xattr_bucket_free(t_bucket); 4786 4787 return ret; 4788 } 4789 4790 /* 4791 * Copy xattr from one bucket to another bucket. 4792 * 4793 * The caller must make sure that the journal transaction 4794 * has enough space for journaling. 4795 */ 4796 static int ocfs2_cp_xattr_bucket(struct inode *inode, 4797 handle_t *handle, 4798 u64 s_blkno, 4799 u64 t_blkno, 4800 int t_is_new) 4801 { 4802 int ret; 4803 struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL; 4804 4805 BUG_ON(s_blkno == t_blkno); 4806 4807 trace_ocfs2_cp_xattr_bucket((unsigned long long)s_blkno, 4808 (unsigned long long)t_blkno, 4809 t_is_new); 4810 4811 s_bucket = ocfs2_xattr_bucket_new(inode); 4812 t_bucket = ocfs2_xattr_bucket_new(inode); 4813 if (!s_bucket || !t_bucket) { 4814 ret = -ENOMEM; 4815 mlog_errno(ret); 4816 goto out; 4817 } 4818 4819 ret = ocfs2_read_xattr_bucket(s_bucket, s_blkno); 4820 if (ret) 4821 goto out; 4822 4823 /* 4824 * Even if !t_is_new, we're overwriting t_bucket. Thus, 4825 * there's no need to read it. 4826 */ 4827 ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno, t_is_new); 4828 if (ret) 4829 goto out; 4830 4831 /* 4832 * Hey, if we're overwriting t_bucket, what difference does 4833 * ACCESS_CREATE vs ACCESS_WRITE make? Well, if we allocated a new 4834 * cluster to fill, we came here from 4835 * ocfs2_mv_xattr_buckets(), and it is really new - 4836 * ACCESS_CREATE is required. But we also might have moved data 4837 * out of t_bucket before extending back into it. 4838 * ocfs2_add_new_xattr_bucket() can do this - its call to 4839 * ocfs2_add_new_xattr_cluster() may have created a new extent 4840 * and copied out the end of the old extent. Then it re-extends 4841 * the old extent back to create space for new xattrs. That's 4842 * how we get here, and the bucket isn't really new. 4843 */ 4844 ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket, 4845 t_is_new ? 4846 OCFS2_JOURNAL_ACCESS_CREATE : 4847 OCFS2_JOURNAL_ACCESS_WRITE); 4848 if (ret) 4849 goto out; 4850 4851 ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket); 4852 ocfs2_xattr_bucket_journal_dirty(handle, t_bucket); 4853 4854 out: 4855 ocfs2_xattr_bucket_free(t_bucket); 4856 ocfs2_xattr_bucket_free(s_bucket); 4857 4858 return ret; 4859 } 4860 4861 /* 4862 * src_blk points to the start of an existing extent. last_blk points to 4863 * last cluster in that extent. to_blk points to a newly allocated 4864 * extent. We copy the buckets from the cluster at last_blk to the new 4865 * extent. If start_bucket is non-zero, we skip that many buckets before 4866 * we start copying. The new extent's xh_num_buckets gets set to the 4867 * number of buckets we copied. The old extent's xh_num_buckets shrinks 4868 * by the same amount. 4869 */ 4870 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle, 4871 u64 src_blk, u64 last_blk, u64 to_blk, 4872 unsigned int start_bucket, 4873 u32 *first_hash) 4874 { 4875 int i, ret, credits; 4876 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 4877 int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 4878 int num_buckets = ocfs2_xattr_buckets_per_cluster(osb); 4879 struct ocfs2_xattr_bucket *old_first, *new_first; 4880 4881 trace_ocfs2_mv_xattr_buckets((unsigned long long)last_blk, 4882 (unsigned long long)to_blk); 4883 4884 BUG_ON(start_bucket >= num_buckets); 4885 if (start_bucket) { 4886 num_buckets -= start_bucket; 4887 last_blk += (start_bucket * blks_per_bucket); 4888 } 4889 4890 /* The first bucket of the original extent */ 4891 old_first = ocfs2_xattr_bucket_new(inode); 4892 /* The first bucket of the new extent */ 4893 new_first = ocfs2_xattr_bucket_new(inode); 4894 if (!old_first || !new_first) { 4895 ret = -ENOMEM; 4896 mlog_errno(ret); 4897 goto out; 4898 } 4899 4900 ret = ocfs2_read_xattr_bucket(old_first, src_blk); 4901 if (ret) { 4902 mlog_errno(ret); 4903 goto out; 4904 } 4905 4906 /* 4907 * We need to update the first bucket of the old extent and all 4908 * the buckets going to the new extent. 4909 */ 4910 credits = ((num_buckets + 1) * blks_per_bucket); 4911 ret = ocfs2_extend_trans(handle, credits); 4912 if (ret) { 4913 mlog_errno(ret); 4914 goto out; 4915 } 4916 4917 ret = ocfs2_xattr_bucket_journal_access(handle, old_first, 4918 OCFS2_JOURNAL_ACCESS_WRITE); 4919 if (ret) { 4920 mlog_errno(ret); 4921 goto out; 4922 } 4923 4924 for (i = 0; i < num_buckets; i++) { 4925 ret = ocfs2_cp_xattr_bucket(inode, handle, 4926 last_blk + (i * blks_per_bucket), 4927 to_blk + (i * blks_per_bucket), 4928 1); 4929 if (ret) { 4930 mlog_errno(ret); 4931 goto out; 4932 } 4933 } 4934 4935 /* 4936 * Get the new bucket ready before we dirty anything 4937 * (This actually shouldn't fail, because we already dirtied 4938 * it once in ocfs2_cp_xattr_bucket()). 4939 */ 4940 ret = ocfs2_read_xattr_bucket(new_first, to_blk); 4941 if (ret) { 4942 mlog_errno(ret); 4943 goto out; 4944 } 4945 ret = ocfs2_xattr_bucket_journal_access(handle, new_first, 4946 OCFS2_JOURNAL_ACCESS_WRITE); 4947 if (ret) { 4948 mlog_errno(ret); 4949 goto out; 4950 } 4951 4952 /* Now update the headers */ 4953 le16_add_cpu(&bucket_xh(old_first)->xh_num_buckets, -num_buckets); 4954 ocfs2_xattr_bucket_journal_dirty(handle, old_first); 4955 4956 bucket_xh(new_first)->xh_num_buckets = cpu_to_le16(num_buckets); 4957 ocfs2_xattr_bucket_journal_dirty(handle, new_first); 4958 4959 if (first_hash) 4960 *first_hash = le32_to_cpu(bucket_xh(new_first)->xh_entries[0].xe_name_hash); 4961 4962 out: 4963 ocfs2_xattr_bucket_free(new_first); 4964 ocfs2_xattr_bucket_free(old_first); 4965 return ret; 4966 } 4967 4968 /* 4969 * Move some xattrs in this cluster to the new cluster. 4970 * This function should only be called when bucket size == cluster size. 4971 * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead. 4972 */ 4973 static int ocfs2_divide_xattr_cluster(struct inode *inode, 4974 handle_t *handle, 4975 u64 prev_blk, 4976 u64 new_blk, 4977 u32 *first_hash) 4978 { 4979 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 4980 int ret, credits = 2 * blk_per_bucket; 4981 4982 BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize); 4983 4984 ret = ocfs2_extend_trans(handle, credits); 4985 if (ret) { 4986 mlog_errno(ret); 4987 return ret; 4988 } 4989 4990 /* Move half of the xattr in start_blk to the next bucket. */ 4991 return ocfs2_divide_xattr_bucket(inode, handle, prev_blk, 4992 new_blk, first_hash, 1); 4993 } 4994 4995 /* 4996 * Move some xattrs from the old cluster to the new one since they are not 4997 * contiguous in ocfs2 xattr tree. 4998 * 4999 * new_blk starts a new separate cluster, and we will move some xattrs from 5000 * prev_blk to it. v_start will be set as the first name hash value in this 5001 * new cluster so that it can be used as e_cpos during tree insertion and 5002 * don't collide with our original b-tree operations. first_bh and header_bh 5003 * will also be updated since they will be used in ocfs2_extend_xattr_bucket 5004 * to extend the insert bucket. 5005 * 5006 * The problem is how much xattr should we move to the new one and when should 5007 * we update first_bh and header_bh? 5008 * 1. If cluster size > bucket size, that means the previous cluster has more 5009 * than 1 bucket, so just move half nums of bucket into the new cluster and 5010 * update the first_bh and header_bh if the insert bucket has been moved 5011 * to the new cluster. 5012 * 2. If cluster_size == bucket_size: 5013 * a) If the previous extent rec has more than one cluster and the insert 5014 * place isn't in the last cluster, copy the entire last cluster to the 5015 * new one. This time, we don't need to upate the first_bh and header_bh 5016 * since they will not be moved into the new cluster. 5017 * b) Otherwise, move the bottom half of the xattrs in the last cluster into 5018 * the new one. And we set the extend flag to zero if the insert place is 5019 * moved into the new allocated cluster since no extend is needed. 5020 */ 5021 static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode, 5022 handle_t *handle, 5023 struct ocfs2_xattr_bucket *first, 5024 struct ocfs2_xattr_bucket *target, 5025 u64 new_blk, 5026 u32 prev_clusters, 5027 u32 *v_start, 5028 int *extend) 5029 { 5030 int ret; 5031 5032 trace_ocfs2_adjust_xattr_cross_cluster( 5033 (unsigned long long)bucket_blkno(first), 5034 (unsigned long long)new_blk, prev_clusters); 5035 5036 if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) { 5037 ret = ocfs2_mv_xattr_bucket_cross_cluster(inode, 5038 handle, 5039 first, target, 5040 new_blk, 5041 prev_clusters, 5042 v_start); 5043 if (ret) 5044 mlog_errno(ret); 5045 } else { 5046 /* The start of the last cluster in the first extent */ 5047 u64 last_blk = bucket_blkno(first) + 5048 ((prev_clusters - 1) * 5049 ocfs2_clusters_to_blocks(inode->i_sb, 1)); 5050 5051 if (prev_clusters > 1 && bucket_blkno(target) != last_blk) { 5052 ret = ocfs2_mv_xattr_buckets(inode, handle, 5053 bucket_blkno(first), 5054 last_blk, new_blk, 0, 5055 v_start); 5056 if (ret) 5057 mlog_errno(ret); 5058 } else { 5059 ret = ocfs2_divide_xattr_cluster(inode, handle, 5060 last_blk, new_blk, 5061 v_start); 5062 if (ret) 5063 mlog_errno(ret); 5064 5065 if ((bucket_blkno(target) == last_blk) && extend) 5066 *extend = 0; 5067 } 5068 } 5069 5070 return ret; 5071 } 5072 5073 /* 5074 * Add a new cluster for xattr storage. 5075 * 5076 * If the new cluster is contiguous with the previous one, it will be 5077 * appended to the same extent record, and num_clusters will be updated. 5078 * If not, we will insert a new extent for it and move some xattrs in 5079 * the last cluster into the new allocated one. 5080 * We also need to limit the maximum size of a btree leaf, otherwise we'll 5081 * lose the benefits of hashing because we'll have to search large leaves. 5082 * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize, 5083 * if it's bigger). 5084 * 5085 * first_bh is the first block of the previous extent rec and header_bh 5086 * indicates the bucket we will insert the new xattrs. They will be updated 5087 * when the header_bh is moved into the new cluster. 5088 */ 5089 static int ocfs2_add_new_xattr_cluster(struct inode *inode, 5090 struct buffer_head *root_bh, 5091 struct ocfs2_xattr_bucket *first, 5092 struct ocfs2_xattr_bucket *target, 5093 u32 *num_clusters, 5094 u32 prev_cpos, 5095 int *extend, 5096 struct ocfs2_xattr_set_ctxt *ctxt) 5097 { 5098 int ret; 5099 u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 5100 u32 prev_clusters = *num_clusters; 5101 u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0; 5102 u64 block; 5103 handle_t *handle = ctxt->handle; 5104 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5105 struct ocfs2_extent_tree et; 5106 5107 trace_ocfs2_add_new_xattr_cluster_begin( 5108 (unsigned long long)OCFS2_I(inode)->ip_blkno, 5109 (unsigned long long)bucket_blkno(first), 5110 prev_cpos, prev_clusters); 5111 5112 ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh); 5113 5114 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh, 5115 OCFS2_JOURNAL_ACCESS_WRITE); 5116 if (ret < 0) { 5117 mlog_errno(ret); 5118 goto leave; 5119 } 5120 5121 ret = __ocfs2_claim_clusters(handle, ctxt->data_ac, 1, 5122 clusters_to_add, &bit_off, &num_bits); 5123 if (ret < 0) { 5124 if (ret != -ENOSPC) 5125 mlog_errno(ret); 5126 goto leave; 5127 } 5128 5129 BUG_ON(num_bits > clusters_to_add); 5130 5131 block = ocfs2_clusters_to_blocks(osb->sb, bit_off); 5132 trace_ocfs2_add_new_xattr_cluster((unsigned long long)block, num_bits); 5133 5134 if (bucket_blkno(first) + (prev_clusters * bpc) == block && 5135 (prev_clusters + num_bits) << osb->s_clustersize_bits <= 5136 OCFS2_MAX_XATTR_TREE_LEAF_SIZE) { 5137 /* 5138 * If this cluster is contiguous with the old one and 5139 * adding this new cluster, we don't surpass the limit of 5140 * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be 5141 * initialized and used like other buckets in the previous 5142 * cluster. 5143 * So add it as a contiguous one. The caller will handle 5144 * its init process. 5145 */ 5146 v_start = prev_cpos + prev_clusters; 5147 *num_clusters = prev_clusters + num_bits; 5148 } else { 5149 ret = ocfs2_adjust_xattr_cross_cluster(inode, 5150 handle, 5151 first, 5152 target, 5153 block, 5154 prev_clusters, 5155 &v_start, 5156 extend); 5157 if (ret) { 5158 mlog_errno(ret); 5159 goto leave; 5160 } 5161 } 5162 5163 trace_ocfs2_add_new_xattr_cluster_insert((unsigned long long)block, 5164 v_start, num_bits); 5165 ret = ocfs2_insert_extent(handle, &et, v_start, block, 5166 num_bits, 0, ctxt->meta_ac); 5167 if (ret < 0) { 5168 mlog_errno(ret); 5169 goto leave; 5170 } 5171 5172 ocfs2_journal_dirty(handle, root_bh); 5173 5174 leave: 5175 return ret; 5176 } 5177 5178 /* 5179 * We are given an extent. 'first' is the bucket at the very front of 5180 * the extent. The extent has space for an additional bucket past 5181 * bucket_xh(first)->xh_num_buckets. 'target_blkno' is the block number 5182 * of the target bucket. We wish to shift every bucket past the target 5183 * down one, filling in that additional space. When we get back to the 5184 * target, we split the target between itself and the now-empty bucket 5185 * at target+1 (aka, target_blkno + blks_per_bucket). 5186 */ 5187 static int ocfs2_extend_xattr_bucket(struct inode *inode, 5188 handle_t *handle, 5189 struct ocfs2_xattr_bucket *first, 5190 u64 target_blk, 5191 u32 num_clusters) 5192 { 5193 int ret, credits; 5194 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5195 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 5196 u64 end_blk; 5197 u16 new_bucket = le16_to_cpu(bucket_xh(first)->xh_num_buckets); 5198 5199 trace_ocfs2_extend_xattr_bucket((unsigned long long)target_blk, 5200 (unsigned long long)bucket_blkno(first), 5201 num_clusters, new_bucket); 5202 5203 /* The extent must have room for an additional bucket */ 5204 BUG_ON(new_bucket >= 5205 (num_clusters * ocfs2_xattr_buckets_per_cluster(osb))); 5206 5207 /* end_blk points to the last existing bucket */ 5208 end_blk = bucket_blkno(first) + ((new_bucket - 1) * blk_per_bucket); 5209 5210 /* 5211 * end_blk is the start of the last existing bucket. 5212 * Thus, (end_blk - target_blk) covers the target bucket and 5213 * every bucket after it up to, but not including, the last 5214 * existing bucket. Then we add the last existing bucket, the 5215 * new bucket, and the first bucket (3 * blk_per_bucket). 5216 */ 5217 credits = (end_blk - target_blk) + (3 * blk_per_bucket); 5218 ret = ocfs2_extend_trans(handle, credits); 5219 if (ret) { 5220 mlog_errno(ret); 5221 goto out; 5222 } 5223 5224 ret = ocfs2_xattr_bucket_journal_access(handle, first, 5225 OCFS2_JOURNAL_ACCESS_WRITE); 5226 if (ret) { 5227 mlog_errno(ret); 5228 goto out; 5229 } 5230 5231 while (end_blk != target_blk) { 5232 ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk, 5233 end_blk + blk_per_bucket, 0); 5234 if (ret) 5235 goto out; 5236 end_blk -= blk_per_bucket; 5237 } 5238 5239 /* Move half of the xattr in target_blkno to the next bucket. */ 5240 ret = ocfs2_divide_xattr_bucket(inode, handle, target_blk, 5241 target_blk + blk_per_bucket, NULL, 0); 5242 5243 le16_add_cpu(&bucket_xh(first)->xh_num_buckets, 1); 5244 ocfs2_xattr_bucket_journal_dirty(handle, first); 5245 5246 out: 5247 return ret; 5248 } 5249 5250 /* 5251 * Add new xattr bucket in an extent record and adjust the buckets 5252 * accordingly. xb_bh is the ocfs2_xattr_block, and target is the 5253 * bucket we want to insert into. 5254 * 5255 * In the easy case, we will move all the buckets after target down by 5256 * one. Half of target's xattrs will be moved to the next bucket. 5257 * 5258 * If current cluster is full, we'll allocate a new one. This may not 5259 * be contiguous. The underlying calls will make sure that there is 5260 * space for the insert, shifting buckets around if necessary. 5261 * 'target' may be moved by those calls. 5262 */ 5263 static int ocfs2_add_new_xattr_bucket(struct inode *inode, 5264 struct buffer_head *xb_bh, 5265 struct ocfs2_xattr_bucket *target, 5266 struct ocfs2_xattr_set_ctxt *ctxt) 5267 { 5268 struct ocfs2_xattr_block *xb = 5269 (struct ocfs2_xattr_block *)xb_bh->b_data; 5270 struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root; 5271 struct ocfs2_extent_list *el = &xb_root->xt_list; 5272 u32 name_hash = 5273 le32_to_cpu(bucket_xh(target)->xh_entries[0].xe_name_hash); 5274 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5275 int ret, num_buckets, extend = 1; 5276 u64 p_blkno; 5277 u32 e_cpos, num_clusters; 5278 /* The bucket at the front of the extent */ 5279 struct ocfs2_xattr_bucket *first; 5280 5281 trace_ocfs2_add_new_xattr_bucket( 5282 (unsigned long long)bucket_blkno(target)); 5283 5284 /* The first bucket of the original extent */ 5285 first = ocfs2_xattr_bucket_new(inode); 5286 if (!first) { 5287 ret = -ENOMEM; 5288 mlog_errno(ret); 5289 goto out; 5290 } 5291 5292 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos, 5293 &num_clusters, el); 5294 if (ret) { 5295 mlog_errno(ret); 5296 goto out; 5297 } 5298 5299 ret = ocfs2_read_xattr_bucket(first, p_blkno); 5300 if (ret) { 5301 mlog_errno(ret); 5302 goto out; 5303 } 5304 5305 num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters; 5306 if (num_buckets == le16_to_cpu(bucket_xh(first)->xh_num_buckets)) { 5307 /* 5308 * This can move first+target if the target bucket moves 5309 * to the new extent. 5310 */ 5311 ret = ocfs2_add_new_xattr_cluster(inode, 5312 xb_bh, 5313 first, 5314 target, 5315 &num_clusters, 5316 e_cpos, 5317 &extend, 5318 ctxt); 5319 if (ret) { 5320 mlog_errno(ret); 5321 goto out; 5322 } 5323 } 5324 5325 if (extend) { 5326 ret = ocfs2_extend_xattr_bucket(inode, 5327 ctxt->handle, 5328 first, 5329 bucket_blkno(target), 5330 num_clusters); 5331 if (ret) 5332 mlog_errno(ret); 5333 } 5334 5335 out: 5336 ocfs2_xattr_bucket_free(first); 5337 5338 return ret; 5339 } 5340 5341 /* 5342 * Truncate the specified xe_off entry in xattr bucket. 5343 * bucket is indicated by header_bh and len is the new length. 5344 * Both the ocfs2_xattr_value_root and the entry will be updated here. 5345 * 5346 * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed. 5347 */ 5348 static int ocfs2_xattr_bucket_value_truncate(struct inode *inode, 5349 struct ocfs2_xattr_bucket *bucket, 5350 int xe_off, 5351 int len, 5352 struct ocfs2_xattr_set_ctxt *ctxt) 5353 { 5354 int ret, offset; 5355 u64 value_blk; 5356 struct ocfs2_xattr_entry *xe; 5357 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 5358 size_t blocksize = inode->i_sb->s_blocksize; 5359 struct ocfs2_xattr_value_buf vb = { 5360 .vb_access = ocfs2_journal_access, 5361 }; 5362 5363 xe = &xh->xh_entries[xe_off]; 5364 5365 BUG_ON(!xe || ocfs2_xattr_is_local(xe)); 5366 5367 offset = le16_to_cpu(xe->xe_name_offset) + 5368 OCFS2_XATTR_SIZE(xe->xe_name_len); 5369 5370 value_blk = offset / blocksize; 5371 5372 /* We don't allow ocfs2_xattr_value to be stored in different block. */ 5373 BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize); 5374 5375 vb.vb_bh = bucket->bu_bhs[value_blk]; 5376 BUG_ON(!vb.vb_bh); 5377 5378 vb.vb_xv = (struct ocfs2_xattr_value_root *) 5379 (vb.vb_bh->b_data + offset % blocksize); 5380 5381 /* 5382 * From here on out we have to dirty the bucket. The generic 5383 * value calls only modify one of the bucket's bhs, but we need 5384 * to send the bucket at once. So if they error, they *could* have 5385 * modified something. We have to assume they did, and dirty 5386 * the whole bucket. This leaves us in a consistent state. 5387 */ 5388 trace_ocfs2_xattr_bucket_value_truncate( 5389 (unsigned long long)bucket_blkno(bucket), xe_off, len); 5390 ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt); 5391 if (ret) { 5392 mlog_errno(ret); 5393 goto out; 5394 } 5395 5396 ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket, 5397 OCFS2_JOURNAL_ACCESS_WRITE); 5398 if (ret) { 5399 mlog_errno(ret); 5400 goto out; 5401 } 5402 5403 xe->xe_value_size = cpu_to_le64(len); 5404 5405 ocfs2_xattr_bucket_journal_dirty(ctxt->handle, bucket); 5406 5407 out: 5408 return ret; 5409 } 5410 5411 static int ocfs2_rm_xattr_cluster(struct inode *inode, 5412 struct buffer_head *root_bh, 5413 u64 blkno, 5414 u32 cpos, 5415 u32 len, 5416 void *para) 5417 { 5418 int ret; 5419 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5420 struct inode *tl_inode = osb->osb_tl_inode; 5421 handle_t *handle; 5422 struct ocfs2_xattr_block *xb = 5423 (struct ocfs2_xattr_block *)root_bh->b_data; 5424 struct ocfs2_alloc_context *meta_ac = NULL; 5425 struct ocfs2_cached_dealloc_ctxt dealloc; 5426 struct ocfs2_extent_tree et; 5427 5428 ret = ocfs2_iterate_xattr_buckets(inode, blkno, len, 5429 ocfs2_delete_xattr_in_bucket, para); 5430 if (ret) { 5431 mlog_errno(ret); 5432 return ret; 5433 } 5434 5435 ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh); 5436 5437 ocfs2_init_dealloc_ctxt(&dealloc); 5438 5439 trace_ocfs2_rm_xattr_cluster( 5440 (unsigned long long)OCFS2_I(inode)->ip_blkno, 5441 (unsigned long long)blkno, cpos, len); 5442 5443 ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), blkno, 5444 len); 5445 5446 ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac); 5447 if (ret) { 5448 mlog_errno(ret); 5449 return ret; 5450 } 5451 5452 mutex_lock(&tl_inode->i_mutex); 5453 5454 if (ocfs2_truncate_log_needs_flush(osb)) { 5455 ret = __ocfs2_flush_truncate_log(osb); 5456 if (ret < 0) { 5457 mlog_errno(ret); 5458 goto out; 5459 } 5460 } 5461 5462 handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb)); 5463 if (IS_ERR(handle)) { 5464 ret = -ENOMEM; 5465 mlog_errno(ret); 5466 goto out; 5467 } 5468 5469 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh, 5470 OCFS2_JOURNAL_ACCESS_WRITE); 5471 if (ret) { 5472 mlog_errno(ret); 5473 goto out_commit; 5474 } 5475 5476 ret = ocfs2_remove_extent(handle, &et, cpos, len, meta_ac, 5477 &dealloc); 5478 if (ret) { 5479 mlog_errno(ret); 5480 goto out_commit; 5481 } 5482 5483 le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len); 5484 ocfs2_journal_dirty(handle, root_bh); 5485 5486 ret = ocfs2_truncate_log_append(osb, handle, blkno, len); 5487 if (ret) 5488 mlog_errno(ret); 5489 ocfs2_update_inode_fsync_trans(handle, inode, 0); 5490 5491 out_commit: 5492 ocfs2_commit_trans(osb, handle); 5493 out: 5494 ocfs2_schedule_truncate_log_flush(osb, 1); 5495 5496 mutex_unlock(&tl_inode->i_mutex); 5497 5498 if (meta_ac) 5499 ocfs2_free_alloc_context(meta_ac); 5500 5501 ocfs2_run_deallocs(osb, &dealloc); 5502 5503 return ret; 5504 } 5505 5506 /* 5507 * check whether the xattr bucket is filled up with the same hash value. 5508 * If we want to insert the xattr with the same hash, return -ENOSPC. 5509 * If we want to insert a xattr with different hash value, go ahead 5510 * and ocfs2_divide_xattr_bucket will handle this. 5511 */ 5512 static int ocfs2_check_xattr_bucket_collision(struct inode *inode, 5513 struct ocfs2_xattr_bucket *bucket, 5514 const char *name) 5515 { 5516 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 5517 u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name)); 5518 5519 if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash)) 5520 return 0; 5521 5522 if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash == 5523 xh->xh_entries[0].xe_name_hash) { 5524 mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, " 5525 "hash = %u\n", 5526 (unsigned long long)bucket_blkno(bucket), 5527 le32_to_cpu(xh->xh_entries[0].xe_name_hash)); 5528 return -ENOSPC; 5529 } 5530 5531 return 0; 5532 } 5533 5534 /* 5535 * Try to set the entry in the current bucket. If we fail, the caller 5536 * will handle getting us another bucket. 5537 */ 5538 static int ocfs2_xattr_set_entry_bucket(struct inode *inode, 5539 struct ocfs2_xattr_info *xi, 5540 struct ocfs2_xattr_search *xs, 5541 struct ocfs2_xattr_set_ctxt *ctxt) 5542 { 5543 int ret; 5544 struct ocfs2_xa_loc loc; 5545 5546 trace_ocfs2_xattr_set_entry_bucket(xi->xi_name); 5547 5548 ocfs2_init_xattr_bucket_xa_loc(&loc, xs->bucket, 5549 xs->not_found ? NULL : xs->here); 5550 ret = ocfs2_xa_set(&loc, xi, ctxt); 5551 if (!ret) { 5552 xs->here = loc.xl_entry; 5553 goto out; 5554 } 5555 if (ret != -ENOSPC) { 5556 mlog_errno(ret); 5557 goto out; 5558 } 5559 5560 /* Ok, we need space. Let's try defragmenting the bucket. */ 5561 ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle, 5562 xs->bucket); 5563 if (ret) { 5564 mlog_errno(ret); 5565 goto out; 5566 } 5567 5568 ret = ocfs2_xa_set(&loc, xi, ctxt); 5569 if (!ret) { 5570 xs->here = loc.xl_entry; 5571 goto out; 5572 } 5573 if (ret != -ENOSPC) 5574 mlog_errno(ret); 5575 5576 5577 out: 5578 return ret; 5579 } 5580 5581 static int ocfs2_xattr_set_entry_index_block(struct inode *inode, 5582 struct ocfs2_xattr_info *xi, 5583 struct ocfs2_xattr_search *xs, 5584 struct ocfs2_xattr_set_ctxt *ctxt) 5585 { 5586 int ret; 5587 5588 trace_ocfs2_xattr_set_entry_index_block(xi->xi_name); 5589 5590 ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt); 5591 if (!ret) 5592 goto out; 5593 if (ret != -ENOSPC) { 5594 mlog_errno(ret); 5595 goto out; 5596 } 5597 5598 /* Ack, need more space. Let's try to get another bucket! */ 5599 5600 /* 5601 * We do not allow for overlapping ranges between buckets. And 5602 * the maximum number of collisions we will allow for then is 5603 * one bucket's worth, so check it here whether we need to 5604 * add a new bucket for the insert. 5605 */ 5606 ret = ocfs2_check_xattr_bucket_collision(inode, 5607 xs->bucket, 5608 xi->xi_name); 5609 if (ret) { 5610 mlog_errno(ret); 5611 goto out; 5612 } 5613 5614 ret = ocfs2_add_new_xattr_bucket(inode, 5615 xs->xattr_bh, 5616 xs->bucket, 5617 ctxt); 5618 if (ret) { 5619 mlog_errno(ret); 5620 goto out; 5621 } 5622 5623 /* 5624 * ocfs2_add_new_xattr_bucket() will have updated 5625 * xs->bucket if it moved, but it will not have updated 5626 * any of the other search fields. Thus, we drop it and 5627 * re-search. Everything should be cached, so it'll be 5628 * quick. 5629 */ 5630 ocfs2_xattr_bucket_relse(xs->bucket); 5631 ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh, 5632 xi->xi_name_index, 5633 xi->xi_name, xs); 5634 if (ret && ret != -ENODATA) 5635 goto out; 5636 xs->not_found = ret; 5637 5638 /* Ok, we have a new bucket, let's try again */ 5639 ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt); 5640 if (ret && (ret != -ENOSPC)) 5641 mlog_errno(ret); 5642 5643 out: 5644 return ret; 5645 } 5646 5647 static int ocfs2_delete_xattr_in_bucket(struct inode *inode, 5648 struct ocfs2_xattr_bucket *bucket, 5649 void *para) 5650 { 5651 int ret = 0, ref_credits; 5652 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 5653 u16 i; 5654 struct ocfs2_xattr_entry *xe; 5655 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5656 struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,}; 5657 int credits = ocfs2_remove_extent_credits(osb->sb) + 5658 ocfs2_blocks_per_xattr_bucket(inode->i_sb); 5659 struct ocfs2_xattr_value_root *xv; 5660 struct ocfs2_rm_xattr_bucket_para *args = 5661 (struct ocfs2_rm_xattr_bucket_para *)para; 5662 5663 ocfs2_init_dealloc_ctxt(&ctxt.dealloc); 5664 5665 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 5666 xe = &xh->xh_entries[i]; 5667 if (ocfs2_xattr_is_local(xe)) 5668 continue; 5669 5670 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, 5671 i, &xv, NULL); 5672 if (ret) { 5673 mlog_errno(ret); 5674 break; 5675 } 5676 5677 ret = ocfs2_lock_xattr_remove_allocators(inode, xv, 5678 args->ref_ci, 5679 args->ref_root_bh, 5680 &ctxt.meta_ac, 5681 &ref_credits); 5682 5683 ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits); 5684 if (IS_ERR(ctxt.handle)) { 5685 ret = PTR_ERR(ctxt.handle); 5686 mlog_errno(ret); 5687 break; 5688 } 5689 5690 ret = ocfs2_xattr_bucket_value_truncate(inode, bucket, 5691 i, 0, &ctxt); 5692 5693 ocfs2_commit_trans(osb, ctxt.handle); 5694 if (ctxt.meta_ac) { 5695 ocfs2_free_alloc_context(ctxt.meta_ac); 5696 ctxt.meta_ac = NULL; 5697 } 5698 if (ret) { 5699 mlog_errno(ret); 5700 break; 5701 } 5702 } 5703 5704 if (ctxt.meta_ac) 5705 ocfs2_free_alloc_context(ctxt.meta_ac); 5706 ocfs2_schedule_truncate_log_flush(osb, 1); 5707 ocfs2_run_deallocs(osb, &ctxt.dealloc); 5708 return ret; 5709 } 5710 5711 /* 5712 * Whenever we modify a xattr value root in the bucket(e.g, CoW 5713 * or change the extent record flag), we need to recalculate 5714 * the metaecc for the whole bucket. So it is done here. 5715 * 5716 * Note: 5717 * We have to give the extra credits for the caller. 5718 */ 5719 static int ocfs2_xattr_bucket_post_refcount(struct inode *inode, 5720 handle_t *handle, 5721 void *para) 5722 { 5723 int ret; 5724 struct ocfs2_xattr_bucket *bucket = 5725 (struct ocfs2_xattr_bucket *)para; 5726 5727 ret = ocfs2_xattr_bucket_journal_access(handle, bucket, 5728 OCFS2_JOURNAL_ACCESS_WRITE); 5729 if (ret) { 5730 mlog_errno(ret); 5731 return ret; 5732 } 5733 5734 ocfs2_xattr_bucket_journal_dirty(handle, bucket); 5735 5736 return 0; 5737 } 5738 5739 /* 5740 * Special action we need if the xattr value is refcounted. 5741 * 5742 * 1. If the xattr is refcounted, lock the tree. 5743 * 2. CoW the xattr if we are setting the new value and the value 5744 * will be stored outside. 5745 * 3. In other case, decrease_refcount will work for us, so just 5746 * lock the refcount tree, calculate the meta and credits is OK. 5747 * 5748 * We have to do CoW before ocfs2_init_xattr_set_ctxt since 5749 * currently CoW is a completed transaction, while this function 5750 * will also lock the allocators and let us deadlock. So we will 5751 * CoW the whole xattr value. 5752 */ 5753 static int ocfs2_prepare_refcount_xattr(struct inode *inode, 5754 struct ocfs2_dinode *di, 5755 struct ocfs2_xattr_info *xi, 5756 struct ocfs2_xattr_search *xis, 5757 struct ocfs2_xattr_search *xbs, 5758 struct ocfs2_refcount_tree **ref_tree, 5759 int *meta_add, 5760 int *credits) 5761 { 5762 int ret = 0; 5763 struct ocfs2_xattr_block *xb; 5764 struct ocfs2_xattr_entry *xe; 5765 char *base; 5766 u32 p_cluster, num_clusters; 5767 unsigned int ext_flags; 5768 int name_offset, name_len; 5769 struct ocfs2_xattr_value_buf vb; 5770 struct ocfs2_xattr_bucket *bucket = NULL; 5771 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5772 struct ocfs2_post_refcount refcount; 5773 struct ocfs2_post_refcount *p = NULL; 5774 struct buffer_head *ref_root_bh = NULL; 5775 5776 if (!xis->not_found) { 5777 xe = xis->here; 5778 name_offset = le16_to_cpu(xe->xe_name_offset); 5779 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 5780 base = xis->base; 5781 vb.vb_bh = xis->inode_bh; 5782 vb.vb_access = ocfs2_journal_access_di; 5783 } else { 5784 int i, block_off = 0; 5785 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; 5786 xe = xbs->here; 5787 name_offset = le16_to_cpu(xe->xe_name_offset); 5788 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 5789 i = xbs->here - xbs->header->xh_entries; 5790 5791 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 5792 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 5793 bucket_xh(xbs->bucket), 5794 i, &block_off, 5795 &name_offset); 5796 if (ret) { 5797 mlog_errno(ret); 5798 goto out; 5799 } 5800 base = bucket_block(xbs->bucket, block_off); 5801 vb.vb_bh = xbs->bucket->bu_bhs[block_off]; 5802 vb.vb_access = ocfs2_journal_access; 5803 5804 if (ocfs2_meta_ecc(osb)) { 5805 /*create parameters for ocfs2_post_refcount. */ 5806 bucket = xbs->bucket; 5807 refcount.credits = bucket->bu_blocks; 5808 refcount.para = bucket; 5809 refcount.func = 5810 ocfs2_xattr_bucket_post_refcount; 5811 p = &refcount; 5812 } 5813 } else { 5814 base = xbs->base; 5815 vb.vb_bh = xbs->xattr_bh; 5816 vb.vb_access = ocfs2_journal_access_xb; 5817 } 5818 } 5819 5820 if (ocfs2_xattr_is_local(xe)) 5821 goto out; 5822 5823 vb.vb_xv = (struct ocfs2_xattr_value_root *) 5824 (base + name_offset + name_len); 5825 5826 ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster, 5827 &num_clusters, &vb.vb_xv->xr_list, 5828 &ext_flags); 5829 if (ret) { 5830 mlog_errno(ret); 5831 goto out; 5832 } 5833 5834 /* 5835 * We just need to check the 1st extent record, since we always 5836 * CoW the whole xattr. So there shouldn't be a xattr with 5837 * some REFCOUNT extent recs after the 1st one. 5838 */ 5839 if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) 5840 goto out; 5841 5842 ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc), 5843 1, ref_tree, &ref_root_bh); 5844 if (ret) { 5845 mlog_errno(ret); 5846 goto out; 5847 } 5848 5849 /* 5850 * If we are deleting the xattr or the new size will be stored inside, 5851 * cool, leave it there, the xattr truncate process will remove them 5852 * for us(it still needs the refcount tree lock and the meta, credits). 5853 * And the worse case is that every cluster truncate will split the 5854 * refcount tree, and make the original extent become 3. So we will need 5855 * 2 * cluster more extent recs at most. 5856 */ 5857 if (!xi->xi_value || xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE) { 5858 5859 ret = ocfs2_refcounted_xattr_delete_need(inode, 5860 &(*ref_tree)->rf_ci, 5861 ref_root_bh, vb.vb_xv, 5862 meta_add, credits); 5863 if (ret) 5864 mlog_errno(ret); 5865 goto out; 5866 } 5867 5868 ret = ocfs2_refcount_cow_xattr(inode, di, &vb, 5869 *ref_tree, ref_root_bh, 0, 5870 le32_to_cpu(vb.vb_xv->xr_clusters), p); 5871 if (ret) 5872 mlog_errno(ret); 5873 5874 out: 5875 brelse(ref_root_bh); 5876 return ret; 5877 } 5878 5879 /* 5880 * Add the REFCOUNTED flags for all the extent rec in ocfs2_xattr_value_root. 5881 * The physical clusters will be added to refcount tree. 5882 */ 5883 static int ocfs2_xattr_value_attach_refcount(struct inode *inode, 5884 struct ocfs2_xattr_value_root *xv, 5885 struct ocfs2_extent_tree *value_et, 5886 struct ocfs2_caching_info *ref_ci, 5887 struct buffer_head *ref_root_bh, 5888 struct ocfs2_cached_dealloc_ctxt *dealloc, 5889 struct ocfs2_post_refcount *refcount) 5890 { 5891 int ret = 0; 5892 u32 clusters = le32_to_cpu(xv->xr_clusters); 5893 u32 cpos, p_cluster, num_clusters; 5894 struct ocfs2_extent_list *el = &xv->xr_list; 5895 unsigned int ext_flags; 5896 5897 cpos = 0; 5898 while (cpos < clusters) { 5899 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, 5900 &num_clusters, el, &ext_flags); 5901 if (ret) { 5902 mlog_errno(ret); 5903 break; 5904 } 5905 5906 cpos += num_clusters; 5907 if ((ext_flags & OCFS2_EXT_REFCOUNTED)) 5908 continue; 5909 5910 BUG_ON(!p_cluster); 5911 5912 ret = ocfs2_add_refcount_flag(inode, value_et, 5913 ref_ci, ref_root_bh, 5914 cpos - num_clusters, 5915 p_cluster, num_clusters, 5916 dealloc, refcount); 5917 if (ret) { 5918 mlog_errno(ret); 5919 break; 5920 } 5921 } 5922 5923 return ret; 5924 } 5925 5926 /* 5927 * Given a normal ocfs2_xattr_header, refcount all the entries which 5928 * have value stored outside. 5929 * Used for xattrs stored in inode and ocfs2_xattr_block. 5930 */ 5931 static int ocfs2_xattr_attach_refcount_normal(struct inode *inode, 5932 struct ocfs2_xattr_value_buf *vb, 5933 struct ocfs2_xattr_header *header, 5934 struct ocfs2_caching_info *ref_ci, 5935 struct buffer_head *ref_root_bh, 5936 struct ocfs2_cached_dealloc_ctxt *dealloc) 5937 { 5938 5939 struct ocfs2_xattr_entry *xe; 5940 struct ocfs2_xattr_value_root *xv; 5941 struct ocfs2_extent_tree et; 5942 int i, ret = 0; 5943 5944 for (i = 0; i < le16_to_cpu(header->xh_count); i++) { 5945 xe = &header->xh_entries[i]; 5946 5947 if (ocfs2_xattr_is_local(xe)) 5948 continue; 5949 5950 xv = (struct ocfs2_xattr_value_root *)((void *)header + 5951 le16_to_cpu(xe->xe_name_offset) + 5952 OCFS2_XATTR_SIZE(xe->xe_name_len)); 5953 5954 vb->vb_xv = xv; 5955 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); 5956 5957 ret = ocfs2_xattr_value_attach_refcount(inode, xv, &et, 5958 ref_ci, ref_root_bh, 5959 dealloc, NULL); 5960 if (ret) { 5961 mlog_errno(ret); 5962 break; 5963 } 5964 } 5965 5966 return ret; 5967 } 5968 5969 static int ocfs2_xattr_inline_attach_refcount(struct inode *inode, 5970 struct buffer_head *fe_bh, 5971 struct ocfs2_caching_info *ref_ci, 5972 struct buffer_head *ref_root_bh, 5973 struct ocfs2_cached_dealloc_ctxt *dealloc) 5974 { 5975 struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data; 5976 struct ocfs2_xattr_header *header = (struct ocfs2_xattr_header *) 5977 (fe_bh->b_data + inode->i_sb->s_blocksize - 5978 le16_to_cpu(di->i_xattr_inline_size)); 5979 struct ocfs2_xattr_value_buf vb = { 5980 .vb_bh = fe_bh, 5981 .vb_access = ocfs2_journal_access_di, 5982 }; 5983 5984 return ocfs2_xattr_attach_refcount_normal(inode, &vb, header, 5985 ref_ci, ref_root_bh, dealloc); 5986 } 5987 5988 struct ocfs2_xattr_tree_value_refcount_para { 5989 struct ocfs2_caching_info *ref_ci; 5990 struct buffer_head *ref_root_bh; 5991 struct ocfs2_cached_dealloc_ctxt *dealloc; 5992 }; 5993 5994 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb, 5995 struct ocfs2_xattr_bucket *bucket, 5996 int offset, 5997 struct ocfs2_xattr_value_root **xv, 5998 struct buffer_head **bh) 5999 { 6000 int ret, block_off, name_offset; 6001 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 6002 struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset]; 6003 void *base; 6004 6005 ret = ocfs2_xattr_bucket_get_name_value(sb, 6006 bucket_xh(bucket), 6007 offset, 6008 &block_off, 6009 &name_offset); 6010 if (ret) { 6011 mlog_errno(ret); 6012 goto out; 6013 } 6014 6015 base = bucket_block(bucket, block_off); 6016 6017 *xv = (struct ocfs2_xattr_value_root *)(base + name_offset + 6018 OCFS2_XATTR_SIZE(xe->xe_name_len)); 6019 6020 if (bh) 6021 *bh = bucket->bu_bhs[block_off]; 6022 out: 6023 return ret; 6024 } 6025 6026 /* 6027 * For a given xattr bucket, refcount all the entries which 6028 * have value stored outside. 6029 */ 6030 static int ocfs2_xattr_bucket_value_refcount(struct inode *inode, 6031 struct ocfs2_xattr_bucket *bucket, 6032 void *para) 6033 { 6034 int i, ret = 0; 6035 struct ocfs2_extent_tree et; 6036 struct ocfs2_xattr_tree_value_refcount_para *ref = 6037 (struct ocfs2_xattr_tree_value_refcount_para *)para; 6038 struct ocfs2_xattr_header *xh = 6039 (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data; 6040 struct ocfs2_xattr_entry *xe; 6041 struct ocfs2_xattr_value_buf vb = { 6042 .vb_access = ocfs2_journal_access, 6043 }; 6044 struct ocfs2_post_refcount refcount = { 6045 .credits = bucket->bu_blocks, 6046 .para = bucket, 6047 .func = ocfs2_xattr_bucket_post_refcount, 6048 }; 6049 struct ocfs2_post_refcount *p = NULL; 6050 6051 /* We only need post_refcount if we support metaecc. */ 6052 if (ocfs2_meta_ecc(OCFS2_SB(inode->i_sb))) 6053 p = &refcount; 6054 6055 trace_ocfs2_xattr_bucket_value_refcount( 6056 (unsigned long long)bucket_blkno(bucket), 6057 le16_to_cpu(xh->xh_count)); 6058 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 6059 xe = &xh->xh_entries[i]; 6060 6061 if (ocfs2_xattr_is_local(xe)) 6062 continue; 6063 6064 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, i, 6065 &vb.vb_xv, &vb.vb_bh); 6066 if (ret) { 6067 mlog_errno(ret); 6068 break; 6069 } 6070 6071 ocfs2_init_xattr_value_extent_tree(&et, 6072 INODE_CACHE(inode), &vb); 6073 6074 ret = ocfs2_xattr_value_attach_refcount(inode, vb.vb_xv, 6075 &et, ref->ref_ci, 6076 ref->ref_root_bh, 6077 ref->dealloc, p); 6078 if (ret) { 6079 mlog_errno(ret); 6080 break; 6081 } 6082 } 6083 6084 return ret; 6085 6086 } 6087 6088 static int ocfs2_refcount_xattr_tree_rec(struct inode *inode, 6089 struct buffer_head *root_bh, 6090 u64 blkno, u32 cpos, u32 len, void *para) 6091 { 6092 return ocfs2_iterate_xattr_buckets(inode, blkno, len, 6093 ocfs2_xattr_bucket_value_refcount, 6094 para); 6095 } 6096 6097 static int ocfs2_xattr_block_attach_refcount(struct inode *inode, 6098 struct buffer_head *blk_bh, 6099 struct ocfs2_caching_info *ref_ci, 6100 struct buffer_head *ref_root_bh, 6101 struct ocfs2_cached_dealloc_ctxt *dealloc) 6102 { 6103 int ret = 0; 6104 struct ocfs2_xattr_block *xb = 6105 (struct ocfs2_xattr_block *)blk_bh->b_data; 6106 6107 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 6108 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header; 6109 struct ocfs2_xattr_value_buf vb = { 6110 .vb_bh = blk_bh, 6111 .vb_access = ocfs2_journal_access_xb, 6112 }; 6113 6114 ret = ocfs2_xattr_attach_refcount_normal(inode, &vb, header, 6115 ref_ci, ref_root_bh, 6116 dealloc); 6117 } else { 6118 struct ocfs2_xattr_tree_value_refcount_para para = { 6119 .ref_ci = ref_ci, 6120 .ref_root_bh = ref_root_bh, 6121 .dealloc = dealloc, 6122 }; 6123 6124 ret = ocfs2_iterate_xattr_index_block(inode, blk_bh, 6125 ocfs2_refcount_xattr_tree_rec, 6126 ¶); 6127 } 6128 6129 return ret; 6130 } 6131 6132 int ocfs2_xattr_attach_refcount_tree(struct inode *inode, 6133 struct buffer_head *fe_bh, 6134 struct ocfs2_caching_info *ref_ci, 6135 struct buffer_head *ref_root_bh, 6136 struct ocfs2_cached_dealloc_ctxt *dealloc) 6137 { 6138 int ret = 0; 6139 struct ocfs2_inode_info *oi = OCFS2_I(inode); 6140 struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data; 6141 struct buffer_head *blk_bh = NULL; 6142 6143 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 6144 ret = ocfs2_xattr_inline_attach_refcount(inode, fe_bh, 6145 ref_ci, ref_root_bh, 6146 dealloc); 6147 if (ret) { 6148 mlog_errno(ret); 6149 goto out; 6150 } 6151 } 6152 6153 if (!di->i_xattr_loc) 6154 goto out; 6155 6156 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc), 6157 &blk_bh); 6158 if (ret < 0) { 6159 mlog_errno(ret); 6160 goto out; 6161 } 6162 6163 ret = ocfs2_xattr_block_attach_refcount(inode, blk_bh, ref_ci, 6164 ref_root_bh, dealloc); 6165 if (ret) 6166 mlog_errno(ret); 6167 6168 brelse(blk_bh); 6169 out: 6170 6171 return ret; 6172 } 6173 6174 typedef int (should_xattr_reflinked)(struct ocfs2_xattr_entry *xe); 6175 /* 6176 * Store the information we need in xattr reflink. 6177 * old_bh and new_bh are inode bh for the old and new inode. 6178 */ 6179 struct ocfs2_xattr_reflink { 6180 struct inode *old_inode; 6181 struct inode *new_inode; 6182 struct buffer_head *old_bh; 6183 struct buffer_head *new_bh; 6184 struct ocfs2_caching_info *ref_ci; 6185 struct buffer_head *ref_root_bh; 6186 struct ocfs2_cached_dealloc_ctxt *dealloc; 6187 should_xattr_reflinked *xattr_reflinked; 6188 }; 6189 6190 /* 6191 * Given a xattr header and xe offset, 6192 * return the proper xv and the corresponding bh. 6193 * xattr in inode, block and xattr tree have different implementaions. 6194 */ 6195 typedef int (get_xattr_value_root)(struct super_block *sb, 6196 struct buffer_head *bh, 6197 struct ocfs2_xattr_header *xh, 6198 int offset, 6199 struct ocfs2_xattr_value_root **xv, 6200 struct buffer_head **ret_bh, 6201 void *para); 6202 6203 /* 6204 * Calculate all the xattr value root metadata stored in this xattr header and 6205 * credits we need if we create them from the scratch. 6206 * We use get_xattr_value_root so that all types of xattr container can use it. 6207 */ 6208 static int ocfs2_value_metas_in_xattr_header(struct super_block *sb, 6209 struct buffer_head *bh, 6210 struct ocfs2_xattr_header *xh, 6211 int *metas, int *credits, 6212 int *num_recs, 6213 get_xattr_value_root *func, 6214 void *para) 6215 { 6216 int i, ret = 0; 6217 struct ocfs2_xattr_value_root *xv; 6218 struct ocfs2_xattr_entry *xe; 6219 6220 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 6221 xe = &xh->xh_entries[i]; 6222 if (ocfs2_xattr_is_local(xe)) 6223 continue; 6224 6225 ret = func(sb, bh, xh, i, &xv, NULL, para); 6226 if (ret) { 6227 mlog_errno(ret); 6228 break; 6229 } 6230 6231 *metas += le16_to_cpu(xv->xr_list.l_tree_depth) * 6232 le16_to_cpu(xv->xr_list.l_next_free_rec); 6233 6234 *credits += ocfs2_calc_extend_credits(sb, 6235 &def_xv.xv.xr_list); 6236 6237 /* 6238 * If the value is a tree with depth > 1, We don't go deep 6239 * to the extent block, so just calculate a maximum record num. 6240 */ 6241 if (!xv->xr_list.l_tree_depth) 6242 *num_recs += le16_to_cpu(xv->xr_list.l_next_free_rec); 6243 else 6244 *num_recs += ocfs2_clusters_for_bytes(sb, 6245 XATTR_SIZE_MAX); 6246 } 6247 6248 return ret; 6249 } 6250 6251 /* Used by xattr inode and block to return the right xv and buffer_head. */ 6252 static int ocfs2_get_xattr_value_root(struct super_block *sb, 6253 struct buffer_head *bh, 6254 struct ocfs2_xattr_header *xh, 6255 int offset, 6256 struct ocfs2_xattr_value_root **xv, 6257 struct buffer_head **ret_bh, 6258 void *para) 6259 { 6260 struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset]; 6261 6262 *xv = (struct ocfs2_xattr_value_root *)((void *)xh + 6263 le16_to_cpu(xe->xe_name_offset) + 6264 OCFS2_XATTR_SIZE(xe->xe_name_len)); 6265 6266 if (ret_bh) 6267 *ret_bh = bh; 6268 6269 return 0; 6270 } 6271 6272 /* 6273 * Lock the meta_ac and caculate how much credits we need for reflink xattrs. 6274 * It is only used for inline xattr and xattr block. 6275 */ 6276 static int ocfs2_reflink_lock_xattr_allocators(struct ocfs2_super *osb, 6277 struct ocfs2_xattr_header *xh, 6278 struct buffer_head *ref_root_bh, 6279 int *credits, 6280 struct ocfs2_alloc_context **meta_ac) 6281 { 6282 int ret, meta_add = 0, num_recs = 0; 6283 struct ocfs2_refcount_block *rb = 6284 (struct ocfs2_refcount_block *)ref_root_bh->b_data; 6285 6286 *credits = 0; 6287 6288 ret = ocfs2_value_metas_in_xattr_header(osb->sb, NULL, xh, 6289 &meta_add, credits, &num_recs, 6290 ocfs2_get_xattr_value_root, 6291 NULL); 6292 if (ret) { 6293 mlog_errno(ret); 6294 goto out; 6295 } 6296 6297 /* 6298 * We need to add/modify num_recs in refcount tree, so just calculate 6299 * an approximate number we need for refcount tree change. 6300 * Sometimes we need to split the tree, and after split, half recs 6301 * will be moved to the new block, and a new block can only provide 6302 * half number of recs. So we multiple new blocks by 2. 6303 */ 6304 num_recs = num_recs / ocfs2_refcount_recs_per_rb(osb->sb) * 2; 6305 meta_add += num_recs; 6306 *credits += num_recs + num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS; 6307 if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL) 6308 *credits += le16_to_cpu(rb->rf_list.l_tree_depth) * 6309 le16_to_cpu(rb->rf_list.l_next_free_rec) + 1; 6310 else 6311 *credits += 1; 6312 6313 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, meta_ac); 6314 if (ret) 6315 mlog_errno(ret); 6316 6317 out: 6318 return ret; 6319 } 6320 6321 /* 6322 * Given a xattr header, reflink all the xattrs in this container. 6323 * It can be used for inode, block and bucket. 6324 * 6325 * NOTE: 6326 * Before we call this function, the caller has memcpy the xattr in 6327 * old_xh to the new_xh. 6328 * 6329 * If args.xattr_reflinked is set, call it to decide whether the xe should 6330 * be reflinked or not. If not, remove it from the new xattr header. 6331 */ 6332 static int ocfs2_reflink_xattr_header(handle_t *handle, 6333 struct ocfs2_xattr_reflink *args, 6334 struct buffer_head *old_bh, 6335 struct ocfs2_xattr_header *xh, 6336 struct buffer_head *new_bh, 6337 struct ocfs2_xattr_header *new_xh, 6338 struct ocfs2_xattr_value_buf *vb, 6339 struct ocfs2_alloc_context *meta_ac, 6340 get_xattr_value_root *func, 6341 void *para) 6342 { 6343 int ret = 0, i, j; 6344 struct super_block *sb = args->old_inode->i_sb; 6345 struct buffer_head *value_bh; 6346 struct ocfs2_xattr_entry *xe, *last; 6347 struct ocfs2_xattr_value_root *xv, *new_xv; 6348 struct ocfs2_extent_tree data_et; 6349 u32 clusters, cpos, p_cluster, num_clusters; 6350 unsigned int ext_flags = 0; 6351 6352 trace_ocfs2_reflink_xattr_header((unsigned long long)old_bh->b_blocknr, 6353 le16_to_cpu(xh->xh_count)); 6354 6355 last = &new_xh->xh_entries[le16_to_cpu(new_xh->xh_count)]; 6356 for (i = 0, j = 0; i < le16_to_cpu(xh->xh_count); i++, j++) { 6357 xe = &xh->xh_entries[i]; 6358 6359 if (args->xattr_reflinked && !args->xattr_reflinked(xe)) { 6360 xe = &new_xh->xh_entries[j]; 6361 6362 le16_add_cpu(&new_xh->xh_count, -1); 6363 if (new_xh->xh_count) { 6364 memmove(xe, xe + 1, 6365 (void *)last - (void *)xe); 6366 memset(last, 0, 6367 sizeof(struct ocfs2_xattr_entry)); 6368 } 6369 6370 /* 6371 * We don't want j to increase in the next round since 6372 * it is already moved ahead. 6373 */ 6374 j--; 6375 continue; 6376 } 6377 6378 if (ocfs2_xattr_is_local(xe)) 6379 continue; 6380 6381 ret = func(sb, old_bh, xh, i, &xv, NULL, para); 6382 if (ret) { 6383 mlog_errno(ret); 6384 break; 6385 } 6386 6387 ret = func(sb, new_bh, new_xh, j, &new_xv, &value_bh, para); 6388 if (ret) { 6389 mlog_errno(ret); 6390 break; 6391 } 6392 6393 /* 6394 * For the xattr which has l_tree_depth = 0, all the extent 6395 * recs have already be copied to the new xh with the 6396 * propriate OCFS2_EXT_REFCOUNTED flag we just need to 6397 * increase the refount count int the refcount tree. 6398 * 6399 * For the xattr which has l_tree_depth > 0, we need 6400 * to initialize it to the empty default value root, 6401 * and then insert the extents one by one. 6402 */ 6403 if (xv->xr_list.l_tree_depth) { 6404 memcpy(new_xv, &def_xv, sizeof(def_xv)); 6405 vb->vb_xv = new_xv; 6406 vb->vb_bh = value_bh; 6407 ocfs2_init_xattr_value_extent_tree(&data_et, 6408 INODE_CACHE(args->new_inode), vb); 6409 } 6410 6411 clusters = le32_to_cpu(xv->xr_clusters); 6412 cpos = 0; 6413 while (cpos < clusters) { 6414 ret = ocfs2_xattr_get_clusters(args->old_inode, 6415 cpos, 6416 &p_cluster, 6417 &num_clusters, 6418 &xv->xr_list, 6419 &ext_flags); 6420 if (ret) { 6421 mlog_errno(ret); 6422 goto out; 6423 } 6424 6425 BUG_ON(!p_cluster); 6426 6427 if (xv->xr_list.l_tree_depth) { 6428 ret = ocfs2_insert_extent(handle, 6429 &data_et, cpos, 6430 ocfs2_clusters_to_blocks( 6431 args->old_inode->i_sb, 6432 p_cluster), 6433 num_clusters, ext_flags, 6434 meta_ac); 6435 if (ret) { 6436 mlog_errno(ret); 6437 goto out; 6438 } 6439 } 6440 6441 ret = ocfs2_increase_refcount(handle, args->ref_ci, 6442 args->ref_root_bh, 6443 p_cluster, num_clusters, 6444 meta_ac, args->dealloc); 6445 if (ret) { 6446 mlog_errno(ret); 6447 goto out; 6448 } 6449 6450 cpos += num_clusters; 6451 } 6452 } 6453 6454 out: 6455 return ret; 6456 } 6457 6458 static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args) 6459 { 6460 int ret = 0, credits = 0; 6461 handle_t *handle; 6462 struct ocfs2_super *osb = OCFS2_SB(args->old_inode->i_sb); 6463 struct ocfs2_dinode *di = (struct ocfs2_dinode *)args->old_bh->b_data; 6464 int inline_size = le16_to_cpu(di->i_xattr_inline_size); 6465 int header_off = osb->sb->s_blocksize - inline_size; 6466 struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *) 6467 (args->old_bh->b_data + header_off); 6468 struct ocfs2_xattr_header *new_xh = (struct ocfs2_xattr_header *) 6469 (args->new_bh->b_data + header_off); 6470 struct ocfs2_alloc_context *meta_ac = NULL; 6471 struct ocfs2_inode_info *new_oi; 6472 struct ocfs2_dinode *new_di; 6473 struct ocfs2_xattr_value_buf vb = { 6474 .vb_bh = args->new_bh, 6475 .vb_access = ocfs2_journal_access_di, 6476 }; 6477 6478 ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh, 6479 &credits, &meta_ac); 6480 if (ret) { 6481 mlog_errno(ret); 6482 goto out; 6483 } 6484 6485 handle = ocfs2_start_trans(osb, credits); 6486 if (IS_ERR(handle)) { 6487 ret = PTR_ERR(handle); 6488 mlog_errno(ret); 6489 goto out; 6490 } 6491 6492 ret = ocfs2_journal_access_di(handle, INODE_CACHE(args->new_inode), 6493 args->new_bh, OCFS2_JOURNAL_ACCESS_WRITE); 6494 if (ret) { 6495 mlog_errno(ret); 6496 goto out_commit; 6497 } 6498 6499 memcpy(args->new_bh->b_data + header_off, 6500 args->old_bh->b_data + header_off, inline_size); 6501 6502 new_di = (struct ocfs2_dinode *)args->new_bh->b_data; 6503 new_di->i_xattr_inline_size = cpu_to_le16(inline_size); 6504 6505 ret = ocfs2_reflink_xattr_header(handle, args, args->old_bh, xh, 6506 args->new_bh, new_xh, &vb, meta_ac, 6507 ocfs2_get_xattr_value_root, NULL); 6508 if (ret) { 6509 mlog_errno(ret); 6510 goto out_commit; 6511 } 6512 6513 new_oi = OCFS2_I(args->new_inode); 6514 /* 6515 * Adjust extent record count to reserve space for extended attribute. 6516 * Inline data count had been adjusted in ocfs2_duplicate_inline_data(). 6517 */ 6518 if (!(new_oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) && 6519 !(ocfs2_inode_is_fast_symlink(args->new_inode))) { 6520 struct ocfs2_extent_list *el = &new_di->id2.i_list; 6521 le16_add_cpu(&el->l_count, -(inline_size / 6522 sizeof(struct ocfs2_extent_rec))); 6523 } 6524 spin_lock(&new_oi->ip_lock); 6525 new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL; 6526 new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features); 6527 spin_unlock(&new_oi->ip_lock); 6528 6529 ocfs2_journal_dirty(handle, args->new_bh); 6530 6531 out_commit: 6532 ocfs2_commit_trans(osb, handle); 6533 6534 out: 6535 if (meta_ac) 6536 ocfs2_free_alloc_context(meta_ac); 6537 return ret; 6538 } 6539 6540 static int ocfs2_create_empty_xattr_block(struct inode *inode, 6541 struct buffer_head *fe_bh, 6542 struct buffer_head **ret_bh, 6543 int indexed) 6544 { 6545 int ret; 6546 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 6547 struct ocfs2_xattr_set_ctxt ctxt; 6548 6549 memset(&ctxt, 0, sizeof(ctxt)); 6550 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &ctxt.meta_ac); 6551 if (ret < 0) { 6552 mlog_errno(ret); 6553 return ret; 6554 } 6555 6556 ctxt.handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_CREATE_CREDITS); 6557 if (IS_ERR(ctxt.handle)) { 6558 ret = PTR_ERR(ctxt.handle); 6559 mlog_errno(ret); 6560 goto out; 6561 } 6562 6563 trace_ocfs2_create_empty_xattr_block( 6564 (unsigned long long)fe_bh->b_blocknr, indexed); 6565 ret = ocfs2_create_xattr_block(inode, fe_bh, &ctxt, indexed, 6566 ret_bh); 6567 if (ret) 6568 mlog_errno(ret); 6569 6570 ocfs2_commit_trans(osb, ctxt.handle); 6571 out: 6572 ocfs2_free_alloc_context(ctxt.meta_ac); 6573 return ret; 6574 } 6575 6576 static int ocfs2_reflink_xattr_block(struct ocfs2_xattr_reflink *args, 6577 struct buffer_head *blk_bh, 6578 struct buffer_head *new_blk_bh) 6579 { 6580 int ret = 0, credits = 0; 6581 handle_t *handle; 6582 struct ocfs2_inode_info *new_oi = OCFS2_I(args->new_inode); 6583 struct ocfs2_dinode *new_di; 6584 struct ocfs2_super *osb = OCFS2_SB(args->new_inode->i_sb); 6585 int header_off = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); 6586 struct ocfs2_xattr_block *xb = 6587 (struct ocfs2_xattr_block *)blk_bh->b_data; 6588 struct ocfs2_xattr_header *xh = &xb->xb_attrs.xb_header; 6589 struct ocfs2_xattr_block *new_xb = 6590 (struct ocfs2_xattr_block *)new_blk_bh->b_data; 6591 struct ocfs2_xattr_header *new_xh = &new_xb->xb_attrs.xb_header; 6592 struct ocfs2_alloc_context *meta_ac; 6593 struct ocfs2_xattr_value_buf vb = { 6594 .vb_bh = new_blk_bh, 6595 .vb_access = ocfs2_journal_access_xb, 6596 }; 6597 6598 ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh, 6599 &credits, &meta_ac); 6600 if (ret) { 6601 mlog_errno(ret); 6602 return ret; 6603 } 6604 6605 /* One more credits in case we need to add xattr flags in new inode. */ 6606 handle = ocfs2_start_trans(osb, credits + 1); 6607 if (IS_ERR(handle)) { 6608 ret = PTR_ERR(handle); 6609 mlog_errno(ret); 6610 goto out; 6611 } 6612 6613 if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) { 6614 ret = ocfs2_journal_access_di(handle, 6615 INODE_CACHE(args->new_inode), 6616 args->new_bh, 6617 OCFS2_JOURNAL_ACCESS_WRITE); 6618 if (ret) { 6619 mlog_errno(ret); 6620 goto out_commit; 6621 } 6622 } 6623 6624 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(args->new_inode), 6625 new_blk_bh, OCFS2_JOURNAL_ACCESS_WRITE); 6626 if (ret) { 6627 mlog_errno(ret); 6628 goto out_commit; 6629 } 6630 6631 memcpy(new_blk_bh->b_data + header_off, blk_bh->b_data + header_off, 6632 osb->sb->s_blocksize - header_off); 6633 6634 ret = ocfs2_reflink_xattr_header(handle, args, blk_bh, xh, 6635 new_blk_bh, new_xh, &vb, meta_ac, 6636 ocfs2_get_xattr_value_root, NULL); 6637 if (ret) { 6638 mlog_errno(ret); 6639 goto out_commit; 6640 } 6641 6642 ocfs2_journal_dirty(handle, new_blk_bh); 6643 6644 if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) { 6645 new_di = (struct ocfs2_dinode *)args->new_bh->b_data; 6646 spin_lock(&new_oi->ip_lock); 6647 new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL; 6648 new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features); 6649 spin_unlock(&new_oi->ip_lock); 6650 6651 ocfs2_journal_dirty(handle, args->new_bh); 6652 } 6653 6654 out_commit: 6655 ocfs2_commit_trans(osb, handle); 6656 6657 out: 6658 ocfs2_free_alloc_context(meta_ac); 6659 return ret; 6660 } 6661 6662 struct ocfs2_reflink_xattr_tree_args { 6663 struct ocfs2_xattr_reflink *reflink; 6664 struct buffer_head *old_blk_bh; 6665 struct buffer_head *new_blk_bh; 6666 struct ocfs2_xattr_bucket *old_bucket; 6667 struct ocfs2_xattr_bucket *new_bucket; 6668 }; 6669 6670 /* 6671 * NOTE: 6672 * We have to handle the case that both old bucket and new bucket 6673 * will call this function to get the right ret_bh. 6674 * So The caller must give us the right bh. 6675 */ 6676 static int ocfs2_get_reflink_xattr_value_root(struct super_block *sb, 6677 struct buffer_head *bh, 6678 struct ocfs2_xattr_header *xh, 6679 int offset, 6680 struct ocfs2_xattr_value_root **xv, 6681 struct buffer_head **ret_bh, 6682 void *para) 6683 { 6684 struct ocfs2_reflink_xattr_tree_args *args = 6685 (struct ocfs2_reflink_xattr_tree_args *)para; 6686 struct ocfs2_xattr_bucket *bucket; 6687 6688 if (bh == args->old_bucket->bu_bhs[0]) 6689 bucket = args->old_bucket; 6690 else 6691 bucket = args->new_bucket; 6692 6693 return ocfs2_get_xattr_tree_value_root(sb, bucket, offset, 6694 xv, ret_bh); 6695 } 6696 6697 struct ocfs2_value_tree_metas { 6698 int num_metas; 6699 int credits; 6700 int num_recs; 6701 }; 6702 6703 static int ocfs2_value_tree_metas_in_bucket(struct super_block *sb, 6704 struct buffer_head *bh, 6705 struct ocfs2_xattr_header *xh, 6706 int offset, 6707 struct ocfs2_xattr_value_root **xv, 6708 struct buffer_head **ret_bh, 6709 void *para) 6710 { 6711 struct ocfs2_xattr_bucket *bucket = 6712 (struct ocfs2_xattr_bucket *)para; 6713 6714 return ocfs2_get_xattr_tree_value_root(sb, bucket, offset, 6715 xv, ret_bh); 6716 } 6717 6718 static int ocfs2_calc_value_tree_metas(struct inode *inode, 6719 struct ocfs2_xattr_bucket *bucket, 6720 void *para) 6721 { 6722 struct ocfs2_value_tree_metas *metas = 6723 (struct ocfs2_value_tree_metas *)para; 6724 struct ocfs2_xattr_header *xh = 6725 (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data; 6726 6727 /* Add the credits for this bucket first. */ 6728 metas->credits += bucket->bu_blocks; 6729 return ocfs2_value_metas_in_xattr_header(inode->i_sb, bucket->bu_bhs[0], 6730 xh, &metas->num_metas, 6731 &metas->credits, &metas->num_recs, 6732 ocfs2_value_tree_metas_in_bucket, 6733 bucket); 6734 } 6735 6736 /* 6737 * Given a xattr extent rec starting from blkno and having len clusters, 6738 * iterate all the buckets calculate how much metadata we need for reflinking 6739 * all the ocfs2_xattr_value_root and lock the allocators accordingly. 6740 */ 6741 static int ocfs2_lock_reflink_xattr_rec_allocators( 6742 struct ocfs2_reflink_xattr_tree_args *args, 6743 struct ocfs2_extent_tree *xt_et, 6744 u64 blkno, u32 len, int *credits, 6745 struct ocfs2_alloc_context **meta_ac, 6746 struct ocfs2_alloc_context **data_ac) 6747 { 6748 int ret, num_free_extents; 6749 struct ocfs2_value_tree_metas metas; 6750 struct ocfs2_super *osb = OCFS2_SB(args->reflink->old_inode->i_sb); 6751 struct ocfs2_refcount_block *rb; 6752 6753 memset(&metas, 0, sizeof(metas)); 6754 6755 ret = ocfs2_iterate_xattr_buckets(args->reflink->old_inode, blkno, len, 6756 ocfs2_calc_value_tree_metas, &metas); 6757 if (ret) { 6758 mlog_errno(ret); 6759 goto out; 6760 } 6761 6762 *credits = metas.credits; 6763 6764 /* 6765 * Calculate we need for refcount tree change. 6766 * 6767 * We need to add/modify num_recs in refcount tree, so just calculate 6768 * an approximate number we need for refcount tree change. 6769 * Sometimes we need to split the tree, and after split, half recs 6770 * will be moved to the new block, and a new block can only provide 6771 * half number of recs. So we multiple new blocks by 2. 6772 * In the end, we have to add credits for modifying the already 6773 * existed refcount block. 6774 */ 6775 rb = (struct ocfs2_refcount_block *)args->reflink->ref_root_bh->b_data; 6776 metas.num_recs = 6777 (metas.num_recs + ocfs2_refcount_recs_per_rb(osb->sb) - 1) / 6778 ocfs2_refcount_recs_per_rb(osb->sb) * 2; 6779 metas.num_metas += metas.num_recs; 6780 *credits += metas.num_recs + 6781 metas.num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS; 6782 if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL) 6783 *credits += le16_to_cpu(rb->rf_list.l_tree_depth) * 6784 le16_to_cpu(rb->rf_list.l_next_free_rec) + 1; 6785 else 6786 *credits += 1; 6787 6788 /* count in the xattr tree change. */ 6789 num_free_extents = ocfs2_num_free_extents(osb, xt_et); 6790 if (num_free_extents < 0) { 6791 ret = num_free_extents; 6792 mlog_errno(ret); 6793 goto out; 6794 } 6795 6796 if (num_free_extents < len) 6797 metas.num_metas += ocfs2_extend_meta_needed(xt_et->et_root_el); 6798 6799 *credits += ocfs2_calc_extend_credits(osb->sb, 6800 xt_et->et_root_el); 6801 6802 if (metas.num_metas) { 6803 ret = ocfs2_reserve_new_metadata_blocks(osb, metas.num_metas, 6804 meta_ac); 6805 if (ret) { 6806 mlog_errno(ret); 6807 goto out; 6808 } 6809 } 6810 6811 if (len) { 6812 ret = ocfs2_reserve_clusters(osb, len, data_ac); 6813 if (ret) 6814 mlog_errno(ret); 6815 } 6816 out: 6817 if (ret) { 6818 if (*meta_ac) { 6819 ocfs2_free_alloc_context(*meta_ac); 6820 *meta_ac = NULL; 6821 } 6822 } 6823 6824 return ret; 6825 } 6826 6827 static int ocfs2_reflink_xattr_bucket(handle_t *handle, 6828 u64 blkno, u64 new_blkno, u32 clusters, 6829 u32 *cpos, int num_buckets, 6830 struct ocfs2_alloc_context *meta_ac, 6831 struct ocfs2_alloc_context *data_ac, 6832 struct ocfs2_reflink_xattr_tree_args *args) 6833 { 6834 int i, j, ret = 0; 6835 struct super_block *sb = args->reflink->old_inode->i_sb; 6836 int bpb = args->old_bucket->bu_blocks; 6837 struct ocfs2_xattr_value_buf vb = { 6838 .vb_access = ocfs2_journal_access, 6839 }; 6840 6841 for (i = 0; i < num_buckets; i++, blkno += bpb, new_blkno += bpb) { 6842 ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno); 6843 if (ret) { 6844 mlog_errno(ret); 6845 break; 6846 } 6847 6848 ret = ocfs2_init_xattr_bucket(args->new_bucket, new_blkno, 1); 6849 if (ret) { 6850 mlog_errno(ret); 6851 break; 6852 } 6853 6854 ret = ocfs2_xattr_bucket_journal_access(handle, 6855 args->new_bucket, 6856 OCFS2_JOURNAL_ACCESS_CREATE); 6857 if (ret) { 6858 mlog_errno(ret); 6859 break; 6860 } 6861 6862 for (j = 0; j < bpb; j++) 6863 memcpy(bucket_block(args->new_bucket, j), 6864 bucket_block(args->old_bucket, j), 6865 sb->s_blocksize); 6866 6867 /* 6868 * Record the start cpos so that we can use it to initialize 6869 * our xattr tree we also set the xh_num_bucket for the new 6870 * bucket. 6871 */ 6872 if (i == 0) { 6873 *cpos = le32_to_cpu(bucket_xh(args->new_bucket)-> 6874 xh_entries[0].xe_name_hash); 6875 bucket_xh(args->new_bucket)->xh_num_buckets = 6876 cpu_to_le16(num_buckets); 6877 } 6878 6879 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket); 6880 6881 ret = ocfs2_reflink_xattr_header(handle, args->reflink, 6882 args->old_bucket->bu_bhs[0], 6883 bucket_xh(args->old_bucket), 6884 args->new_bucket->bu_bhs[0], 6885 bucket_xh(args->new_bucket), 6886 &vb, meta_ac, 6887 ocfs2_get_reflink_xattr_value_root, 6888 args); 6889 if (ret) { 6890 mlog_errno(ret); 6891 break; 6892 } 6893 6894 /* 6895 * Re-access and dirty the bucket to calculate metaecc. 6896 * Because we may extend the transaction in reflink_xattr_header 6897 * which will let the already accessed block gone. 6898 */ 6899 ret = ocfs2_xattr_bucket_journal_access(handle, 6900 args->new_bucket, 6901 OCFS2_JOURNAL_ACCESS_WRITE); 6902 if (ret) { 6903 mlog_errno(ret); 6904 break; 6905 } 6906 6907 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket); 6908 6909 ocfs2_xattr_bucket_relse(args->old_bucket); 6910 ocfs2_xattr_bucket_relse(args->new_bucket); 6911 } 6912 6913 ocfs2_xattr_bucket_relse(args->old_bucket); 6914 ocfs2_xattr_bucket_relse(args->new_bucket); 6915 return ret; 6916 } 6917 6918 static int ocfs2_reflink_xattr_buckets(handle_t *handle, 6919 struct inode *inode, 6920 struct ocfs2_reflink_xattr_tree_args *args, 6921 struct ocfs2_extent_tree *et, 6922 struct ocfs2_alloc_context *meta_ac, 6923 struct ocfs2_alloc_context *data_ac, 6924 u64 blkno, u32 cpos, u32 len) 6925 { 6926 int ret, first_inserted = 0; 6927 u32 p_cluster, num_clusters, reflink_cpos = 0; 6928 u64 new_blkno; 6929 unsigned int num_buckets, reflink_buckets; 6930 unsigned int bpc = 6931 ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)); 6932 6933 ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno); 6934 if (ret) { 6935 mlog_errno(ret); 6936 goto out; 6937 } 6938 num_buckets = le16_to_cpu(bucket_xh(args->old_bucket)->xh_num_buckets); 6939 ocfs2_xattr_bucket_relse(args->old_bucket); 6940 6941 while (len && num_buckets) { 6942 ret = ocfs2_claim_clusters(handle, data_ac, 6943 1, &p_cluster, &num_clusters); 6944 if (ret) { 6945 mlog_errno(ret); 6946 goto out; 6947 } 6948 6949 new_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 6950 reflink_buckets = min(num_buckets, bpc * num_clusters); 6951 6952 ret = ocfs2_reflink_xattr_bucket(handle, blkno, 6953 new_blkno, num_clusters, 6954 &reflink_cpos, reflink_buckets, 6955 meta_ac, data_ac, args); 6956 if (ret) { 6957 mlog_errno(ret); 6958 goto out; 6959 } 6960 6961 /* 6962 * For the 1st allocated cluster, we make it use the same cpos 6963 * so that the xattr tree looks the same as the original one 6964 * in the most case. 6965 */ 6966 if (!first_inserted) { 6967 reflink_cpos = cpos; 6968 first_inserted = 1; 6969 } 6970 ret = ocfs2_insert_extent(handle, et, reflink_cpos, new_blkno, 6971 num_clusters, 0, meta_ac); 6972 if (ret) 6973 mlog_errno(ret); 6974 6975 trace_ocfs2_reflink_xattr_buckets((unsigned long long)new_blkno, 6976 num_clusters, reflink_cpos); 6977 6978 len -= num_clusters; 6979 blkno += ocfs2_clusters_to_blocks(inode->i_sb, num_clusters); 6980 num_buckets -= reflink_buckets; 6981 } 6982 out: 6983 return ret; 6984 } 6985 6986 /* 6987 * Create the same xattr extent record in the new inode's xattr tree. 6988 */ 6989 static int ocfs2_reflink_xattr_rec(struct inode *inode, 6990 struct buffer_head *root_bh, 6991 u64 blkno, 6992 u32 cpos, 6993 u32 len, 6994 void *para) 6995 { 6996 int ret, credits = 0; 6997 handle_t *handle; 6998 struct ocfs2_reflink_xattr_tree_args *args = 6999 (struct ocfs2_reflink_xattr_tree_args *)para; 7000 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 7001 struct ocfs2_alloc_context *meta_ac = NULL; 7002 struct ocfs2_alloc_context *data_ac = NULL; 7003 struct ocfs2_extent_tree et; 7004 7005 trace_ocfs2_reflink_xattr_rec((unsigned long long)blkno, len); 7006 7007 ocfs2_init_xattr_tree_extent_tree(&et, 7008 INODE_CACHE(args->reflink->new_inode), 7009 args->new_blk_bh); 7010 7011 ret = ocfs2_lock_reflink_xattr_rec_allocators(args, &et, blkno, 7012 len, &credits, 7013 &meta_ac, &data_ac); 7014 if (ret) { 7015 mlog_errno(ret); 7016 goto out; 7017 } 7018 7019 handle = ocfs2_start_trans(osb, credits); 7020 if (IS_ERR(handle)) { 7021 ret = PTR_ERR(handle); 7022 mlog_errno(ret); 7023 goto out; 7024 } 7025 7026 ret = ocfs2_reflink_xattr_buckets(handle, inode, args, &et, 7027 meta_ac, data_ac, 7028 blkno, cpos, len); 7029 if (ret) 7030 mlog_errno(ret); 7031 7032 ocfs2_commit_trans(osb, handle); 7033 7034 out: 7035 if (meta_ac) 7036 ocfs2_free_alloc_context(meta_ac); 7037 if (data_ac) 7038 ocfs2_free_alloc_context(data_ac); 7039 return ret; 7040 } 7041 7042 /* 7043 * Create reflinked xattr buckets. 7044 * We will add bucket one by one, and refcount all the xattrs in the bucket 7045 * if they are stored outside. 7046 */ 7047 static int ocfs2_reflink_xattr_tree(struct ocfs2_xattr_reflink *args, 7048 struct buffer_head *blk_bh, 7049 struct buffer_head *new_blk_bh) 7050 { 7051 int ret; 7052 struct ocfs2_reflink_xattr_tree_args para; 7053 7054 memset(¶, 0, sizeof(para)); 7055 para.reflink = args; 7056 para.old_blk_bh = blk_bh; 7057 para.new_blk_bh = new_blk_bh; 7058 7059 para.old_bucket = ocfs2_xattr_bucket_new(args->old_inode); 7060 if (!para.old_bucket) { 7061 mlog_errno(-ENOMEM); 7062 return -ENOMEM; 7063 } 7064 7065 para.new_bucket = ocfs2_xattr_bucket_new(args->new_inode); 7066 if (!para.new_bucket) { 7067 ret = -ENOMEM; 7068 mlog_errno(ret); 7069 goto out; 7070 } 7071 7072 ret = ocfs2_iterate_xattr_index_block(args->old_inode, blk_bh, 7073 ocfs2_reflink_xattr_rec, 7074 ¶); 7075 if (ret) 7076 mlog_errno(ret); 7077 7078 out: 7079 ocfs2_xattr_bucket_free(para.old_bucket); 7080 ocfs2_xattr_bucket_free(para.new_bucket); 7081 return ret; 7082 } 7083 7084 static int ocfs2_reflink_xattr_in_block(struct ocfs2_xattr_reflink *args, 7085 struct buffer_head *blk_bh) 7086 { 7087 int ret, indexed = 0; 7088 struct buffer_head *new_blk_bh = NULL; 7089 struct ocfs2_xattr_block *xb = 7090 (struct ocfs2_xattr_block *)blk_bh->b_data; 7091 7092 7093 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) 7094 indexed = 1; 7095 7096 ret = ocfs2_create_empty_xattr_block(args->new_inode, args->new_bh, 7097 &new_blk_bh, indexed); 7098 if (ret) { 7099 mlog_errno(ret); 7100 goto out; 7101 } 7102 7103 if (!indexed) 7104 ret = ocfs2_reflink_xattr_block(args, blk_bh, new_blk_bh); 7105 else 7106 ret = ocfs2_reflink_xattr_tree(args, blk_bh, new_blk_bh); 7107 if (ret) 7108 mlog_errno(ret); 7109 7110 out: 7111 brelse(new_blk_bh); 7112 return ret; 7113 } 7114 7115 static int ocfs2_reflink_xattr_no_security(struct ocfs2_xattr_entry *xe) 7116 { 7117 int type = ocfs2_xattr_get_type(xe); 7118 7119 return type != OCFS2_XATTR_INDEX_SECURITY && 7120 type != OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS && 7121 type != OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT; 7122 } 7123 7124 int ocfs2_reflink_xattrs(struct inode *old_inode, 7125 struct buffer_head *old_bh, 7126 struct inode *new_inode, 7127 struct buffer_head *new_bh, 7128 bool preserve_security) 7129 { 7130 int ret; 7131 struct ocfs2_xattr_reflink args; 7132 struct ocfs2_inode_info *oi = OCFS2_I(old_inode); 7133 struct ocfs2_dinode *di = (struct ocfs2_dinode *)old_bh->b_data; 7134 struct buffer_head *blk_bh = NULL; 7135 struct ocfs2_cached_dealloc_ctxt dealloc; 7136 struct ocfs2_refcount_tree *ref_tree; 7137 struct buffer_head *ref_root_bh = NULL; 7138 7139 ret = ocfs2_lock_refcount_tree(OCFS2_SB(old_inode->i_sb), 7140 le64_to_cpu(di->i_refcount_loc), 7141 1, &ref_tree, &ref_root_bh); 7142 if (ret) { 7143 mlog_errno(ret); 7144 goto out; 7145 } 7146 7147 ocfs2_init_dealloc_ctxt(&dealloc); 7148 7149 args.old_inode = old_inode; 7150 args.new_inode = new_inode; 7151 args.old_bh = old_bh; 7152 args.new_bh = new_bh; 7153 args.ref_ci = &ref_tree->rf_ci; 7154 args.ref_root_bh = ref_root_bh; 7155 args.dealloc = &dealloc; 7156 if (preserve_security) 7157 args.xattr_reflinked = NULL; 7158 else 7159 args.xattr_reflinked = ocfs2_reflink_xattr_no_security; 7160 7161 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 7162 ret = ocfs2_reflink_xattr_inline(&args); 7163 if (ret) { 7164 mlog_errno(ret); 7165 goto out_unlock; 7166 } 7167 } 7168 7169 if (!di->i_xattr_loc) 7170 goto out_unlock; 7171 7172 ret = ocfs2_read_xattr_block(old_inode, le64_to_cpu(di->i_xattr_loc), 7173 &blk_bh); 7174 if (ret < 0) { 7175 mlog_errno(ret); 7176 goto out_unlock; 7177 } 7178 7179 ret = ocfs2_reflink_xattr_in_block(&args, blk_bh); 7180 if (ret) 7181 mlog_errno(ret); 7182 7183 brelse(blk_bh); 7184 7185 out_unlock: 7186 ocfs2_unlock_refcount_tree(OCFS2_SB(old_inode->i_sb), 7187 ref_tree, 1); 7188 brelse(ref_root_bh); 7189 7190 if (ocfs2_dealloc_has_cluster(&dealloc)) { 7191 ocfs2_schedule_truncate_log_flush(OCFS2_SB(old_inode->i_sb), 1); 7192 ocfs2_run_deallocs(OCFS2_SB(old_inode->i_sb), &dealloc); 7193 } 7194 7195 out: 7196 return ret; 7197 } 7198 7199 /* 7200 * Initialize security and acl for a already created inode. 7201 * Used for reflink a non-preserve-security file. 7202 * 7203 * It uses common api like ocfs2_xattr_set, so the caller 7204 * must not hold any lock expect i_mutex. 7205 */ 7206 int ocfs2_init_security_and_acl(struct inode *dir, 7207 struct inode *inode, 7208 const struct qstr *qstr, 7209 struct posix_acl *default_acl, 7210 struct posix_acl *acl) 7211 { 7212 struct buffer_head *dir_bh = NULL; 7213 int ret = 0; 7214 7215 ret = ocfs2_init_security_get(inode, dir, qstr, NULL); 7216 if (ret) { 7217 mlog_errno(ret); 7218 goto leave; 7219 } 7220 7221 ret = ocfs2_inode_lock(dir, &dir_bh, 0); 7222 if (ret) { 7223 mlog_errno(ret); 7224 goto leave; 7225 } 7226 7227 if (!ret && default_acl) 7228 ret = ocfs2_iop_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); 7229 if (!ret && acl) 7230 ret = ocfs2_iop_set_acl(inode, acl, ACL_TYPE_ACCESS); 7231 7232 ocfs2_inode_unlock(dir, 0); 7233 brelse(dir_bh); 7234 leave: 7235 return ret; 7236 } 7237 /* 7238 * 'security' attributes support 7239 */ 7240 static size_t ocfs2_xattr_security_list(struct dentry *dentry, char *list, 7241 size_t list_size, const char *name, 7242 size_t name_len, int type) 7243 { 7244 const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN; 7245 const size_t total_len = prefix_len + name_len + 1; 7246 7247 if (list && total_len <= list_size) { 7248 memcpy(list, XATTR_SECURITY_PREFIX, prefix_len); 7249 memcpy(list + prefix_len, name, name_len); 7250 list[prefix_len + name_len] = '\0'; 7251 } 7252 return total_len; 7253 } 7254 7255 static int ocfs2_xattr_security_get(struct dentry *dentry, const char *name, 7256 void *buffer, size_t size, int type) 7257 { 7258 if (strcmp(name, "") == 0) 7259 return -EINVAL; 7260 return ocfs2_xattr_get(d_inode(dentry), OCFS2_XATTR_INDEX_SECURITY, 7261 name, buffer, size); 7262 } 7263 7264 static int ocfs2_xattr_security_set(struct dentry *dentry, const char *name, 7265 const void *value, size_t size, int flags, int type) 7266 { 7267 if (strcmp(name, "") == 0) 7268 return -EINVAL; 7269 7270 return ocfs2_xattr_set(d_inode(dentry), OCFS2_XATTR_INDEX_SECURITY, 7271 name, value, size, flags); 7272 } 7273 7274 int ocfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array, 7275 void *fs_info) 7276 { 7277 const struct xattr *xattr; 7278 int err = 0; 7279 7280 for (xattr = xattr_array; xattr->name != NULL; xattr++) { 7281 err = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, 7282 xattr->name, xattr->value, 7283 xattr->value_len, XATTR_CREATE); 7284 if (err) 7285 break; 7286 } 7287 return err; 7288 } 7289 7290 int ocfs2_init_security_get(struct inode *inode, 7291 struct inode *dir, 7292 const struct qstr *qstr, 7293 struct ocfs2_security_xattr_info *si) 7294 { 7295 /* check whether ocfs2 support feature xattr */ 7296 if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb))) 7297 return -EOPNOTSUPP; 7298 if (si) 7299 return security_old_inode_init_security(inode, dir, qstr, 7300 &si->name, &si->value, 7301 &si->value_len); 7302 7303 return security_inode_init_security(inode, dir, qstr, 7304 &ocfs2_initxattrs, NULL); 7305 } 7306 7307 int ocfs2_init_security_set(handle_t *handle, 7308 struct inode *inode, 7309 struct buffer_head *di_bh, 7310 struct ocfs2_security_xattr_info *si, 7311 struct ocfs2_alloc_context *xattr_ac, 7312 struct ocfs2_alloc_context *data_ac) 7313 { 7314 return ocfs2_xattr_set_handle(handle, inode, di_bh, 7315 OCFS2_XATTR_INDEX_SECURITY, 7316 si->name, si->value, si->value_len, 0, 7317 xattr_ac, data_ac); 7318 } 7319 7320 const struct xattr_handler ocfs2_xattr_security_handler = { 7321 .prefix = XATTR_SECURITY_PREFIX, 7322 .list = ocfs2_xattr_security_list, 7323 .get = ocfs2_xattr_security_get, 7324 .set = ocfs2_xattr_security_set, 7325 }; 7326 7327 /* 7328 * 'trusted' attributes support 7329 */ 7330 static size_t ocfs2_xattr_trusted_list(struct dentry *dentry, char *list, 7331 size_t list_size, const char *name, 7332 size_t name_len, int type) 7333 { 7334 const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN; 7335 const size_t total_len = prefix_len + name_len + 1; 7336 7337 if (list && total_len <= list_size) { 7338 memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len); 7339 memcpy(list + prefix_len, name, name_len); 7340 list[prefix_len + name_len] = '\0'; 7341 } 7342 return total_len; 7343 } 7344 7345 static int ocfs2_xattr_trusted_get(struct dentry *dentry, const char *name, 7346 void *buffer, size_t size, int type) 7347 { 7348 if (strcmp(name, "") == 0) 7349 return -EINVAL; 7350 return ocfs2_xattr_get(d_inode(dentry), OCFS2_XATTR_INDEX_TRUSTED, 7351 name, buffer, size); 7352 } 7353 7354 static int ocfs2_xattr_trusted_set(struct dentry *dentry, const char *name, 7355 const void *value, size_t size, int flags, int type) 7356 { 7357 if (strcmp(name, "") == 0) 7358 return -EINVAL; 7359 7360 return ocfs2_xattr_set(d_inode(dentry), OCFS2_XATTR_INDEX_TRUSTED, 7361 name, value, size, flags); 7362 } 7363 7364 const struct xattr_handler ocfs2_xattr_trusted_handler = { 7365 .prefix = XATTR_TRUSTED_PREFIX, 7366 .list = ocfs2_xattr_trusted_list, 7367 .get = ocfs2_xattr_trusted_get, 7368 .set = ocfs2_xattr_trusted_set, 7369 }; 7370 7371 /* 7372 * 'user' attributes support 7373 */ 7374 static size_t ocfs2_xattr_user_list(struct dentry *dentry, char *list, 7375 size_t list_size, const char *name, 7376 size_t name_len, int type) 7377 { 7378 const size_t prefix_len = XATTR_USER_PREFIX_LEN; 7379 const size_t total_len = prefix_len + name_len + 1; 7380 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); 7381 7382 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 7383 return 0; 7384 7385 if (list && total_len <= list_size) { 7386 memcpy(list, XATTR_USER_PREFIX, prefix_len); 7387 memcpy(list + prefix_len, name, name_len); 7388 list[prefix_len + name_len] = '\0'; 7389 } 7390 return total_len; 7391 } 7392 7393 static int ocfs2_xattr_user_get(struct dentry *dentry, const char *name, 7394 void *buffer, size_t size, int type) 7395 { 7396 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); 7397 7398 if (strcmp(name, "") == 0) 7399 return -EINVAL; 7400 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 7401 return -EOPNOTSUPP; 7402 return ocfs2_xattr_get(d_inode(dentry), OCFS2_XATTR_INDEX_USER, name, 7403 buffer, size); 7404 } 7405 7406 static int ocfs2_xattr_user_set(struct dentry *dentry, const char *name, 7407 const void *value, size_t size, int flags, int type) 7408 { 7409 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); 7410 7411 if (strcmp(name, "") == 0) 7412 return -EINVAL; 7413 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 7414 return -EOPNOTSUPP; 7415 7416 return ocfs2_xattr_set(d_inode(dentry), OCFS2_XATTR_INDEX_USER, 7417 name, value, size, flags); 7418 } 7419 7420 const struct xattr_handler ocfs2_xattr_user_handler = { 7421 .prefix = XATTR_USER_PREFIX, 7422 .list = ocfs2_xattr_user_list, 7423 .get = ocfs2_xattr_user_get, 7424 .set = ocfs2_xattr_user_set, 7425 }; 7426