1 /* -*- mode: c; c-basic-offset: 8; -*- 2 * vim: noexpandtab sw=8 ts=8 sts=0: 3 * 4 * xattr.c 5 * 6 * Copyright (C) 2004, 2008 Oracle. All rights reserved. 7 * 8 * CREDITS: 9 * Lots of code in this file is copy from linux/fs/ext3/xattr.c. 10 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> 11 * 12 * This program is free software; you can redistribute it and/or 13 * modify it under the terms of the GNU General Public 14 * License version 2 as published by the Free Software Foundation. 15 * 16 * This program is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 * General Public License for more details. 20 */ 21 22 #include <linux/capability.h> 23 #include <linux/fs.h> 24 #include <linux/types.h> 25 #include <linux/slab.h> 26 #include <linux/highmem.h> 27 #include <linux/pagemap.h> 28 #include <linux/uio.h> 29 #include <linux/sched.h> 30 #include <linux/splice.h> 31 #include <linux/mount.h> 32 #include <linux/writeback.h> 33 #include <linux/falloc.h> 34 #include <linux/sort.h> 35 #include <linux/init.h> 36 #include <linux/module.h> 37 #include <linux/string.h> 38 #include <linux/security.h> 39 40 #include <cluster/masklog.h> 41 42 #include "ocfs2.h" 43 #include "alloc.h" 44 #include "blockcheck.h" 45 #include "dlmglue.h" 46 #include "file.h" 47 #include "symlink.h" 48 #include "sysfile.h" 49 #include "inode.h" 50 #include "journal.h" 51 #include "ocfs2_fs.h" 52 #include "suballoc.h" 53 #include "uptodate.h" 54 #include "buffer_head_io.h" 55 #include "super.h" 56 #include "xattr.h" 57 #include "refcounttree.h" 58 #include "acl.h" 59 #include "ocfs2_trace.h" 60 61 struct ocfs2_xattr_def_value_root { 62 struct ocfs2_xattr_value_root xv; 63 struct ocfs2_extent_rec er; 64 }; 65 66 struct ocfs2_xattr_bucket { 67 /* The inode these xattrs are associated with */ 68 struct inode *bu_inode; 69 70 /* The actual buffers that make up the bucket */ 71 struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET]; 72 73 /* How many blocks make up one bucket for this filesystem */ 74 int bu_blocks; 75 }; 76 77 struct ocfs2_xattr_set_ctxt { 78 handle_t *handle; 79 struct ocfs2_alloc_context *meta_ac; 80 struct ocfs2_alloc_context *data_ac; 81 struct ocfs2_cached_dealloc_ctxt dealloc; 82 int set_abort; 83 }; 84 85 #define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root)) 86 #define OCFS2_XATTR_INLINE_SIZE 80 87 #define OCFS2_XATTR_HEADER_GAP 4 88 #define OCFS2_XATTR_FREE_IN_IBODY (OCFS2_MIN_XATTR_INLINE_SIZE \ 89 - sizeof(struct ocfs2_xattr_header) \ 90 - OCFS2_XATTR_HEADER_GAP) 91 #define OCFS2_XATTR_FREE_IN_BLOCK(ptr) ((ptr)->i_sb->s_blocksize \ 92 - sizeof(struct ocfs2_xattr_block) \ 93 - sizeof(struct ocfs2_xattr_header) \ 94 - OCFS2_XATTR_HEADER_GAP) 95 96 static struct ocfs2_xattr_def_value_root def_xv = { 97 .xv.xr_list.l_count = cpu_to_le16(1), 98 }; 99 100 const struct xattr_handler *ocfs2_xattr_handlers[] = { 101 &ocfs2_xattr_user_handler, 102 &posix_acl_access_xattr_handler, 103 &posix_acl_default_xattr_handler, 104 &ocfs2_xattr_trusted_handler, 105 &ocfs2_xattr_security_handler, 106 NULL 107 }; 108 109 static const struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = { 110 [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler, 111 [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS] 112 = &posix_acl_access_xattr_handler, 113 [OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT] 114 = &posix_acl_default_xattr_handler, 115 [OCFS2_XATTR_INDEX_TRUSTED] = &ocfs2_xattr_trusted_handler, 116 [OCFS2_XATTR_INDEX_SECURITY] = &ocfs2_xattr_security_handler, 117 }; 118 119 struct ocfs2_xattr_info { 120 int xi_name_index; 121 const char *xi_name; 122 int xi_name_len; 123 const void *xi_value; 124 size_t xi_value_len; 125 }; 126 127 struct ocfs2_xattr_search { 128 struct buffer_head *inode_bh; 129 /* 130 * xattr_bh point to the block buffer head which has extended attribute 131 * when extended attribute in inode, xattr_bh is equal to inode_bh. 132 */ 133 struct buffer_head *xattr_bh; 134 struct ocfs2_xattr_header *header; 135 struct ocfs2_xattr_bucket *bucket; 136 void *base; 137 void *end; 138 struct ocfs2_xattr_entry *here; 139 int not_found; 140 }; 141 142 /* Operations on struct ocfs2_xa_entry */ 143 struct ocfs2_xa_loc; 144 struct ocfs2_xa_loc_operations { 145 /* 146 * Journal functions 147 */ 148 int (*xlo_journal_access)(handle_t *handle, struct ocfs2_xa_loc *loc, 149 int type); 150 void (*xlo_journal_dirty)(handle_t *handle, struct ocfs2_xa_loc *loc); 151 152 /* 153 * Return a pointer to the appropriate buffer in loc->xl_storage 154 * at the given offset from loc->xl_header. 155 */ 156 void *(*xlo_offset_pointer)(struct ocfs2_xa_loc *loc, int offset); 157 158 /* Can we reuse the existing entry for the new value? */ 159 int (*xlo_can_reuse)(struct ocfs2_xa_loc *loc, 160 struct ocfs2_xattr_info *xi); 161 162 /* How much space is needed for the new value? */ 163 int (*xlo_check_space)(struct ocfs2_xa_loc *loc, 164 struct ocfs2_xattr_info *xi); 165 166 /* 167 * Return the offset of the first name+value pair. This is 168 * the start of our downward-filling free space. 169 */ 170 int (*xlo_get_free_start)(struct ocfs2_xa_loc *loc); 171 172 /* 173 * Remove the name+value at this location. Do whatever is 174 * appropriate with the remaining name+value pairs. 175 */ 176 void (*xlo_wipe_namevalue)(struct ocfs2_xa_loc *loc); 177 178 /* Fill xl_entry with a new entry */ 179 void (*xlo_add_entry)(struct ocfs2_xa_loc *loc, u32 name_hash); 180 181 /* Add name+value storage to an entry */ 182 void (*xlo_add_namevalue)(struct ocfs2_xa_loc *loc, int size); 183 184 /* 185 * Initialize the value buf's access and bh fields for this entry. 186 * ocfs2_xa_fill_value_buf() will handle the xv pointer. 187 */ 188 void (*xlo_fill_value_buf)(struct ocfs2_xa_loc *loc, 189 struct ocfs2_xattr_value_buf *vb); 190 }; 191 192 /* 193 * Describes an xattr entry location. This is a memory structure 194 * tracking the on-disk structure. 195 */ 196 struct ocfs2_xa_loc { 197 /* This xattr belongs to this inode */ 198 struct inode *xl_inode; 199 200 /* The ocfs2_xattr_header inside the on-disk storage. Not NULL. */ 201 struct ocfs2_xattr_header *xl_header; 202 203 /* Bytes from xl_header to the end of the storage */ 204 int xl_size; 205 206 /* 207 * The ocfs2_xattr_entry this location describes. If this is 208 * NULL, this location describes the on-disk structure where it 209 * would have been. 210 */ 211 struct ocfs2_xattr_entry *xl_entry; 212 213 /* 214 * Internal housekeeping 215 */ 216 217 /* Buffer(s) containing this entry */ 218 void *xl_storage; 219 220 /* Operations on the storage backing this location */ 221 const struct ocfs2_xa_loc_operations *xl_ops; 222 }; 223 224 /* 225 * Convenience functions to calculate how much space is needed for a 226 * given name+value pair 227 */ 228 static int namevalue_size(int name_len, uint64_t value_len) 229 { 230 if (value_len > OCFS2_XATTR_INLINE_SIZE) 231 return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE; 232 else 233 return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len); 234 } 235 236 static int namevalue_size_xi(struct ocfs2_xattr_info *xi) 237 { 238 return namevalue_size(xi->xi_name_len, xi->xi_value_len); 239 } 240 241 static int namevalue_size_xe(struct ocfs2_xattr_entry *xe) 242 { 243 u64 value_len = le64_to_cpu(xe->xe_value_size); 244 245 BUG_ON((value_len > OCFS2_XATTR_INLINE_SIZE) && 246 ocfs2_xattr_is_local(xe)); 247 return namevalue_size(xe->xe_name_len, value_len); 248 } 249 250 251 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb, 252 struct ocfs2_xattr_header *xh, 253 int index, 254 int *block_off, 255 int *new_offset); 256 257 static int ocfs2_xattr_block_find(struct inode *inode, 258 int name_index, 259 const char *name, 260 struct ocfs2_xattr_search *xs); 261 static int ocfs2_xattr_index_block_find(struct inode *inode, 262 struct buffer_head *root_bh, 263 int name_index, 264 const char *name, 265 struct ocfs2_xattr_search *xs); 266 267 static int ocfs2_xattr_tree_list_index_block(struct inode *inode, 268 struct buffer_head *blk_bh, 269 char *buffer, 270 size_t buffer_size); 271 272 static int ocfs2_xattr_create_index_block(struct inode *inode, 273 struct ocfs2_xattr_search *xs, 274 struct ocfs2_xattr_set_ctxt *ctxt); 275 276 static int ocfs2_xattr_set_entry_index_block(struct inode *inode, 277 struct ocfs2_xattr_info *xi, 278 struct ocfs2_xattr_search *xs, 279 struct ocfs2_xattr_set_ctxt *ctxt); 280 281 typedef int (xattr_tree_rec_func)(struct inode *inode, 282 struct buffer_head *root_bh, 283 u64 blkno, u32 cpos, u32 len, void *para); 284 static int ocfs2_iterate_xattr_index_block(struct inode *inode, 285 struct buffer_head *root_bh, 286 xattr_tree_rec_func *rec_func, 287 void *para); 288 static int ocfs2_delete_xattr_in_bucket(struct inode *inode, 289 struct ocfs2_xattr_bucket *bucket, 290 void *para); 291 static int ocfs2_rm_xattr_cluster(struct inode *inode, 292 struct buffer_head *root_bh, 293 u64 blkno, 294 u32 cpos, 295 u32 len, 296 void *para); 297 298 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle, 299 u64 src_blk, u64 last_blk, u64 to_blk, 300 unsigned int start_bucket, 301 u32 *first_hash); 302 static int ocfs2_prepare_refcount_xattr(struct inode *inode, 303 struct ocfs2_dinode *di, 304 struct ocfs2_xattr_info *xi, 305 struct ocfs2_xattr_search *xis, 306 struct ocfs2_xattr_search *xbs, 307 struct ocfs2_refcount_tree **ref_tree, 308 int *meta_need, 309 int *credits); 310 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb, 311 struct ocfs2_xattr_bucket *bucket, 312 int offset, 313 struct ocfs2_xattr_value_root **xv, 314 struct buffer_head **bh); 315 316 static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb) 317 { 318 return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE; 319 } 320 321 static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb) 322 { 323 return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits); 324 } 325 326 #define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr) 327 #define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data) 328 #define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0)) 329 330 static struct ocfs2_xattr_bucket *ocfs2_xattr_bucket_new(struct inode *inode) 331 { 332 struct ocfs2_xattr_bucket *bucket; 333 int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 334 335 BUG_ON(blks > OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET); 336 337 bucket = kzalloc(sizeof(struct ocfs2_xattr_bucket), GFP_NOFS); 338 if (bucket) { 339 bucket->bu_inode = inode; 340 bucket->bu_blocks = blks; 341 } 342 343 return bucket; 344 } 345 346 static void ocfs2_xattr_bucket_relse(struct ocfs2_xattr_bucket *bucket) 347 { 348 int i; 349 350 for (i = 0; i < bucket->bu_blocks; i++) { 351 brelse(bucket->bu_bhs[i]); 352 bucket->bu_bhs[i] = NULL; 353 } 354 } 355 356 static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket) 357 { 358 if (bucket) { 359 ocfs2_xattr_bucket_relse(bucket); 360 bucket->bu_inode = NULL; 361 kfree(bucket); 362 } 363 } 364 365 /* 366 * A bucket that has never been written to disk doesn't need to be 367 * read. We just need the buffer_heads. Don't call this for 368 * buckets that are already on disk. ocfs2_read_xattr_bucket() initializes 369 * them fully. 370 */ 371 static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket, 372 u64 xb_blkno, int new) 373 { 374 int i, rc = 0; 375 376 for (i = 0; i < bucket->bu_blocks; i++) { 377 bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb, 378 xb_blkno + i); 379 if (!bucket->bu_bhs[i]) { 380 rc = -ENOMEM; 381 mlog_errno(rc); 382 break; 383 } 384 385 if (!ocfs2_buffer_uptodate(INODE_CACHE(bucket->bu_inode), 386 bucket->bu_bhs[i])) { 387 if (new) 388 ocfs2_set_new_buffer_uptodate(INODE_CACHE(bucket->bu_inode), 389 bucket->bu_bhs[i]); 390 else { 391 set_buffer_uptodate(bucket->bu_bhs[i]); 392 ocfs2_set_buffer_uptodate(INODE_CACHE(bucket->bu_inode), 393 bucket->bu_bhs[i]); 394 } 395 } 396 } 397 398 if (rc) 399 ocfs2_xattr_bucket_relse(bucket); 400 return rc; 401 } 402 403 /* Read the xattr bucket at xb_blkno */ 404 static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket, 405 u64 xb_blkno) 406 { 407 int rc; 408 409 rc = ocfs2_read_blocks(INODE_CACHE(bucket->bu_inode), xb_blkno, 410 bucket->bu_blocks, bucket->bu_bhs, 0, 411 NULL); 412 if (!rc) { 413 spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 414 rc = ocfs2_validate_meta_ecc_bhs(bucket->bu_inode->i_sb, 415 bucket->bu_bhs, 416 bucket->bu_blocks, 417 &bucket_xh(bucket)->xh_check); 418 spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 419 if (rc) 420 mlog_errno(rc); 421 } 422 423 if (rc) 424 ocfs2_xattr_bucket_relse(bucket); 425 return rc; 426 } 427 428 static int ocfs2_xattr_bucket_journal_access(handle_t *handle, 429 struct ocfs2_xattr_bucket *bucket, 430 int type) 431 { 432 int i, rc = 0; 433 434 for (i = 0; i < bucket->bu_blocks; i++) { 435 rc = ocfs2_journal_access(handle, 436 INODE_CACHE(bucket->bu_inode), 437 bucket->bu_bhs[i], type); 438 if (rc) { 439 mlog_errno(rc); 440 break; 441 } 442 } 443 444 return rc; 445 } 446 447 static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle, 448 struct ocfs2_xattr_bucket *bucket) 449 { 450 int i; 451 452 spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 453 ocfs2_compute_meta_ecc_bhs(bucket->bu_inode->i_sb, 454 bucket->bu_bhs, bucket->bu_blocks, 455 &bucket_xh(bucket)->xh_check); 456 spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 457 458 for (i = 0; i < bucket->bu_blocks; i++) 459 ocfs2_journal_dirty(handle, bucket->bu_bhs[i]); 460 } 461 462 static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest, 463 struct ocfs2_xattr_bucket *src) 464 { 465 int i; 466 int blocksize = src->bu_inode->i_sb->s_blocksize; 467 468 BUG_ON(dest->bu_blocks != src->bu_blocks); 469 BUG_ON(dest->bu_inode != src->bu_inode); 470 471 for (i = 0; i < src->bu_blocks; i++) { 472 memcpy(bucket_block(dest, i), bucket_block(src, i), 473 blocksize); 474 } 475 } 476 477 static int ocfs2_validate_xattr_block(struct super_block *sb, 478 struct buffer_head *bh) 479 { 480 int rc; 481 struct ocfs2_xattr_block *xb = 482 (struct ocfs2_xattr_block *)bh->b_data; 483 484 trace_ocfs2_validate_xattr_block((unsigned long long)bh->b_blocknr); 485 486 BUG_ON(!buffer_uptodate(bh)); 487 488 /* 489 * If the ecc fails, we return the error but otherwise 490 * leave the filesystem running. We know any error is 491 * local to this block. 492 */ 493 rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &xb->xb_check); 494 if (rc) 495 return rc; 496 497 /* 498 * Errors after here are fatal 499 */ 500 501 if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) { 502 return ocfs2_error(sb, 503 "Extended attribute block #%llu has bad " 504 "signature %.*s", 505 (unsigned long long)bh->b_blocknr, 7, 506 xb->xb_signature); 507 } 508 509 if (le64_to_cpu(xb->xb_blkno) != bh->b_blocknr) { 510 return ocfs2_error(sb, 511 "Extended attribute block #%llu has an " 512 "invalid xb_blkno of %llu", 513 (unsigned long long)bh->b_blocknr, 514 (unsigned long long)le64_to_cpu(xb->xb_blkno)); 515 } 516 517 if (le32_to_cpu(xb->xb_fs_generation) != OCFS2_SB(sb)->fs_generation) { 518 return ocfs2_error(sb, 519 "Extended attribute block #%llu has an invalid " 520 "xb_fs_generation of #%u", 521 (unsigned long long)bh->b_blocknr, 522 le32_to_cpu(xb->xb_fs_generation)); 523 } 524 525 return 0; 526 } 527 528 static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno, 529 struct buffer_head **bh) 530 { 531 int rc; 532 struct buffer_head *tmp = *bh; 533 534 rc = ocfs2_read_block(INODE_CACHE(inode), xb_blkno, &tmp, 535 ocfs2_validate_xattr_block); 536 537 /* If ocfs2_read_block() got us a new bh, pass it up. */ 538 if (!rc && !*bh) 539 *bh = tmp; 540 541 return rc; 542 } 543 544 static inline const char *ocfs2_xattr_prefix(int name_index) 545 { 546 const struct xattr_handler *handler = NULL; 547 548 if (name_index > 0 && name_index < OCFS2_XATTR_MAX) 549 handler = ocfs2_xattr_handler_map[name_index]; 550 551 return handler ? handler->prefix : NULL; 552 } 553 554 static u32 ocfs2_xattr_name_hash(struct inode *inode, 555 const char *name, 556 int name_len) 557 { 558 /* Get hash value of uuid from super block */ 559 u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash; 560 int i; 561 562 /* hash extended attribute name */ 563 for (i = 0; i < name_len; i++) { 564 hash = (hash << OCFS2_HASH_SHIFT) ^ 565 (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^ 566 *name++; 567 } 568 569 return hash; 570 } 571 572 static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len) 573 { 574 return namevalue_size(name_len, value_len) + 575 sizeof(struct ocfs2_xattr_entry); 576 } 577 578 static int ocfs2_xi_entry_usage(struct ocfs2_xattr_info *xi) 579 { 580 return namevalue_size_xi(xi) + 581 sizeof(struct ocfs2_xattr_entry); 582 } 583 584 static int ocfs2_xe_entry_usage(struct ocfs2_xattr_entry *xe) 585 { 586 return namevalue_size_xe(xe) + 587 sizeof(struct ocfs2_xattr_entry); 588 } 589 590 int ocfs2_calc_security_init(struct inode *dir, 591 struct ocfs2_security_xattr_info *si, 592 int *want_clusters, 593 int *xattr_credits, 594 struct ocfs2_alloc_context **xattr_ac) 595 { 596 int ret = 0; 597 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); 598 int s_size = ocfs2_xattr_entry_real_size(strlen(si->name), 599 si->value_len); 600 601 /* 602 * The max space of security xattr taken inline is 603 * 256(name) + 80(value) + 16(entry) = 352 bytes, 604 * So reserve one metadata block for it is ok. 605 */ 606 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE || 607 s_size > OCFS2_XATTR_FREE_IN_IBODY) { 608 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac); 609 if (ret) { 610 mlog_errno(ret); 611 return ret; 612 } 613 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; 614 } 615 616 /* reserve clusters for xattr value which will be set in B tree*/ 617 if (si->value_len > OCFS2_XATTR_INLINE_SIZE) { 618 int new_clusters = ocfs2_clusters_for_bytes(dir->i_sb, 619 si->value_len); 620 621 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb, 622 new_clusters); 623 *want_clusters += new_clusters; 624 } 625 return ret; 626 } 627 628 int ocfs2_calc_xattr_init(struct inode *dir, 629 struct buffer_head *dir_bh, 630 umode_t mode, 631 struct ocfs2_security_xattr_info *si, 632 int *want_clusters, 633 int *xattr_credits, 634 int *want_meta) 635 { 636 int ret = 0; 637 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); 638 int s_size = 0, a_size = 0, acl_len = 0, new_clusters; 639 640 if (si->enable) 641 s_size = ocfs2_xattr_entry_real_size(strlen(si->name), 642 si->value_len); 643 644 if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) { 645 acl_len = ocfs2_xattr_get_nolock(dir, dir_bh, 646 OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT, 647 "", NULL, 0); 648 if (acl_len > 0) { 649 a_size = ocfs2_xattr_entry_real_size(0, acl_len); 650 if (S_ISDIR(mode)) 651 a_size <<= 1; 652 } else if (acl_len != 0 && acl_len != -ENODATA) { 653 mlog_errno(ret); 654 return ret; 655 } 656 } 657 658 if (!(s_size + a_size)) 659 return ret; 660 661 /* 662 * The max space of security xattr taken inline is 663 * 256(name) + 80(value) + 16(entry) = 352 bytes, 664 * The max space of acl xattr taken inline is 665 * 80(value) + 16(entry) * 2(if directory) = 192 bytes, 666 * when blocksize = 512, may reserve one more cluser for 667 * xattr bucket, otherwise reserve one metadata block 668 * for them is ok. 669 * If this is a new directory with inline data, 670 * we choose to reserve the entire inline area for 671 * directory contents and force an external xattr block. 672 */ 673 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE || 674 (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) || 675 (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) { 676 *want_meta = *want_meta + 1; 677 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; 678 } 679 680 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE && 681 (s_size + a_size) > OCFS2_XATTR_FREE_IN_BLOCK(dir)) { 682 *want_clusters += 1; 683 *xattr_credits += ocfs2_blocks_per_xattr_bucket(dir->i_sb); 684 } 685 686 /* 687 * reserve credits and clusters for xattrs which has large value 688 * and have to be set outside 689 */ 690 if (si->enable && si->value_len > OCFS2_XATTR_INLINE_SIZE) { 691 new_clusters = ocfs2_clusters_for_bytes(dir->i_sb, 692 si->value_len); 693 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb, 694 new_clusters); 695 *want_clusters += new_clusters; 696 } 697 if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL && 698 acl_len > OCFS2_XATTR_INLINE_SIZE) { 699 /* for directory, it has DEFAULT and ACCESS two types of acls */ 700 new_clusters = (S_ISDIR(mode) ? 2 : 1) * 701 ocfs2_clusters_for_bytes(dir->i_sb, acl_len); 702 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb, 703 new_clusters); 704 *want_clusters += new_clusters; 705 } 706 707 return ret; 708 } 709 710 static int ocfs2_xattr_extend_allocation(struct inode *inode, 711 u32 clusters_to_add, 712 struct ocfs2_xattr_value_buf *vb, 713 struct ocfs2_xattr_set_ctxt *ctxt) 714 { 715 int status = 0, credits; 716 handle_t *handle = ctxt->handle; 717 enum ocfs2_alloc_restarted why; 718 u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters); 719 struct ocfs2_extent_tree et; 720 721 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); 722 723 while (clusters_to_add) { 724 trace_ocfs2_xattr_extend_allocation(clusters_to_add); 725 726 status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh, 727 OCFS2_JOURNAL_ACCESS_WRITE); 728 if (status < 0) { 729 mlog_errno(status); 730 break; 731 } 732 733 prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters); 734 status = ocfs2_add_clusters_in_btree(handle, 735 &et, 736 &logical_start, 737 clusters_to_add, 738 0, 739 ctxt->data_ac, 740 ctxt->meta_ac, 741 &why); 742 if ((status < 0) && (status != -EAGAIN)) { 743 if (status != -ENOSPC) 744 mlog_errno(status); 745 break; 746 } 747 748 ocfs2_journal_dirty(handle, vb->vb_bh); 749 750 clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - 751 prev_clusters; 752 753 if (why != RESTART_NONE && clusters_to_add) { 754 /* 755 * We can only fail in case the alloc file doesn't give 756 * up enough clusters. 757 */ 758 BUG_ON(why == RESTART_META); 759 760 credits = ocfs2_calc_extend_credits(inode->i_sb, 761 &vb->vb_xv->xr_list); 762 status = ocfs2_extend_trans(handle, credits); 763 if (status < 0) { 764 status = -ENOMEM; 765 mlog_errno(status); 766 break; 767 } 768 } 769 } 770 771 return status; 772 } 773 774 static int __ocfs2_remove_xattr_range(struct inode *inode, 775 struct ocfs2_xattr_value_buf *vb, 776 u32 cpos, u32 phys_cpos, u32 len, 777 unsigned int ext_flags, 778 struct ocfs2_xattr_set_ctxt *ctxt) 779 { 780 int ret; 781 u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); 782 handle_t *handle = ctxt->handle; 783 struct ocfs2_extent_tree et; 784 785 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); 786 787 ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh, 788 OCFS2_JOURNAL_ACCESS_WRITE); 789 if (ret) { 790 mlog_errno(ret); 791 goto out; 792 } 793 794 ret = ocfs2_remove_extent(handle, &et, cpos, len, ctxt->meta_ac, 795 &ctxt->dealloc); 796 if (ret) { 797 mlog_errno(ret); 798 goto out; 799 } 800 801 le32_add_cpu(&vb->vb_xv->xr_clusters, -len); 802 ocfs2_journal_dirty(handle, vb->vb_bh); 803 804 if (ext_flags & OCFS2_EXT_REFCOUNTED) 805 ret = ocfs2_decrease_refcount(inode, handle, 806 ocfs2_blocks_to_clusters(inode->i_sb, 807 phys_blkno), 808 len, ctxt->meta_ac, &ctxt->dealloc, 1); 809 else 810 ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc, 811 phys_blkno, len); 812 if (ret) 813 mlog_errno(ret); 814 815 out: 816 return ret; 817 } 818 819 static int ocfs2_xattr_shrink_size(struct inode *inode, 820 u32 old_clusters, 821 u32 new_clusters, 822 struct ocfs2_xattr_value_buf *vb, 823 struct ocfs2_xattr_set_ctxt *ctxt) 824 { 825 int ret = 0; 826 unsigned int ext_flags; 827 u32 trunc_len, cpos, phys_cpos, alloc_size; 828 u64 block; 829 830 if (old_clusters <= new_clusters) 831 return 0; 832 833 cpos = new_clusters; 834 trunc_len = old_clusters - new_clusters; 835 while (trunc_len) { 836 ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos, 837 &alloc_size, 838 &vb->vb_xv->xr_list, &ext_flags); 839 if (ret) { 840 mlog_errno(ret); 841 goto out; 842 } 843 844 if (alloc_size > trunc_len) 845 alloc_size = trunc_len; 846 847 ret = __ocfs2_remove_xattr_range(inode, vb, cpos, 848 phys_cpos, alloc_size, 849 ext_flags, ctxt); 850 if (ret) { 851 mlog_errno(ret); 852 goto out; 853 } 854 855 block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); 856 ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), 857 block, alloc_size); 858 cpos += alloc_size; 859 trunc_len -= alloc_size; 860 } 861 862 out: 863 return ret; 864 } 865 866 static int ocfs2_xattr_value_truncate(struct inode *inode, 867 struct ocfs2_xattr_value_buf *vb, 868 int len, 869 struct ocfs2_xattr_set_ctxt *ctxt) 870 { 871 int ret; 872 u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len); 873 u32 old_clusters = le32_to_cpu(vb->vb_xv->xr_clusters); 874 875 if (new_clusters == old_clusters) 876 return 0; 877 878 if (new_clusters > old_clusters) 879 ret = ocfs2_xattr_extend_allocation(inode, 880 new_clusters - old_clusters, 881 vb, ctxt); 882 else 883 ret = ocfs2_xattr_shrink_size(inode, 884 old_clusters, new_clusters, 885 vb, ctxt); 886 887 return ret; 888 } 889 890 static int ocfs2_xattr_list_entry(char *buffer, size_t size, 891 size_t *result, const char *prefix, 892 const char *name, int name_len) 893 { 894 char *p = buffer + *result; 895 int prefix_len = strlen(prefix); 896 int total_len = prefix_len + name_len + 1; 897 898 *result += total_len; 899 900 /* we are just looking for how big our buffer needs to be */ 901 if (!size) 902 return 0; 903 904 if (*result > size) 905 return -ERANGE; 906 907 memcpy(p, prefix, prefix_len); 908 memcpy(p + prefix_len, name, name_len); 909 p[prefix_len + name_len] = '\0'; 910 911 return 0; 912 } 913 914 static int ocfs2_xattr_list_entries(struct inode *inode, 915 struct ocfs2_xattr_header *header, 916 char *buffer, size_t buffer_size) 917 { 918 size_t result = 0; 919 int i, type, ret; 920 const char *prefix, *name; 921 922 for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) { 923 struct ocfs2_xattr_entry *entry = &header->xh_entries[i]; 924 type = ocfs2_xattr_get_type(entry); 925 prefix = ocfs2_xattr_prefix(type); 926 927 if (prefix) { 928 name = (const char *)header + 929 le16_to_cpu(entry->xe_name_offset); 930 931 ret = ocfs2_xattr_list_entry(buffer, buffer_size, 932 &result, prefix, name, 933 entry->xe_name_len); 934 if (ret) 935 return ret; 936 } 937 } 938 939 return result; 940 } 941 942 int ocfs2_has_inline_xattr_value_outside(struct inode *inode, 943 struct ocfs2_dinode *di) 944 { 945 struct ocfs2_xattr_header *xh; 946 int i; 947 948 xh = (struct ocfs2_xattr_header *) 949 ((void *)di + inode->i_sb->s_blocksize - 950 le16_to_cpu(di->i_xattr_inline_size)); 951 952 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) 953 if (!ocfs2_xattr_is_local(&xh->xh_entries[i])) 954 return 1; 955 956 return 0; 957 } 958 959 static int ocfs2_xattr_ibody_list(struct inode *inode, 960 struct ocfs2_dinode *di, 961 char *buffer, 962 size_t buffer_size) 963 { 964 struct ocfs2_xattr_header *header = NULL; 965 struct ocfs2_inode_info *oi = OCFS2_I(inode); 966 int ret = 0; 967 968 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) 969 return ret; 970 971 header = (struct ocfs2_xattr_header *) 972 ((void *)di + inode->i_sb->s_blocksize - 973 le16_to_cpu(di->i_xattr_inline_size)); 974 975 ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size); 976 977 return ret; 978 } 979 980 static int ocfs2_xattr_block_list(struct inode *inode, 981 struct ocfs2_dinode *di, 982 char *buffer, 983 size_t buffer_size) 984 { 985 struct buffer_head *blk_bh = NULL; 986 struct ocfs2_xattr_block *xb; 987 int ret = 0; 988 989 if (!di->i_xattr_loc) 990 return ret; 991 992 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc), 993 &blk_bh); 994 if (ret < 0) { 995 mlog_errno(ret); 996 return ret; 997 } 998 999 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 1000 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 1001 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header; 1002 ret = ocfs2_xattr_list_entries(inode, header, 1003 buffer, buffer_size); 1004 } else 1005 ret = ocfs2_xattr_tree_list_index_block(inode, blk_bh, 1006 buffer, buffer_size); 1007 1008 brelse(blk_bh); 1009 1010 return ret; 1011 } 1012 1013 ssize_t ocfs2_listxattr(struct dentry *dentry, 1014 char *buffer, 1015 size_t size) 1016 { 1017 int ret = 0, i_ret = 0, b_ret = 0; 1018 struct buffer_head *di_bh = NULL; 1019 struct ocfs2_dinode *di = NULL; 1020 struct ocfs2_inode_info *oi = OCFS2_I(d_inode(dentry)); 1021 1022 if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb))) 1023 return -EOPNOTSUPP; 1024 1025 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) 1026 return ret; 1027 1028 ret = ocfs2_inode_lock(d_inode(dentry), &di_bh, 0); 1029 if (ret < 0) { 1030 mlog_errno(ret); 1031 return ret; 1032 } 1033 1034 di = (struct ocfs2_dinode *)di_bh->b_data; 1035 1036 down_read(&oi->ip_xattr_sem); 1037 i_ret = ocfs2_xattr_ibody_list(d_inode(dentry), di, buffer, size); 1038 if (i_ret < 0) 1039 b_ret = 0; 1040 else { 1041 if (buffer) { 1042 buffer += i_ret; 1043 size -= i_ret; 1044 } 1045 b_ret = ocfs2_xattr_block_list(d_inode(dentry), di, 1046 buffer, size); 1047 if (b_ret < 0) 1048 i_ret = 0; 1049 } 1050 up_read(&oi->ip_xattr_sem); 1051 ocfs2_inode_unlock(d_inode(dentry), 0); 1052 1053 brelse(di_bh); 1054 1055 return i_ret + b_ret; 1056 } 1057 1058 static int ocfs2_xattr_find_entry(int name_index, 1059 const char *name, 1060 struct ocfs2_xattr_search *xs) 1061 { 1062 struct ocfs2_xattr_entry *entry; 1063 size_t name_len; 1064 int i, cmp = 1; 1065 1066 if (name == NULL) 1067 return -EINVAL; 1068 1069 name_len = strlen(name); 1070 entry = xs->here; 1071 for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) { 1072 cmp = name_index - ocfs2_xattr_get_type(entry); 1073 if (!cmp) 1074 cmp = name_len - entry->xe_name_len; 1075 if (!cmp) 1076 cmp = memcmp(name, (xs->base + 1077 le16_to_cpu(entry->xe_name_offset)), 1078 name_len); 1079 if (cmp == 0) 1080 break; 1081 entry += 1; 1082 } 1083 xs->here = entry; 1084 1085 return cmp ? -ENODATA : 0; 1086 } 1087 1088 static int ocfs2_xattr_get_value_outside(struct inode *inode, 1089 struct ocfs2_xattr_value_root *xv, 1090 void *buffer, 1091 size_t len) 1092 { 1093 u32 cpos, p_cluster, num_clusters, bpc, clusters; 1094 u64 blkno; 1095 int i, ret = 0; 1096 size_t cplen, blocksize; 1097 struct buffer_head *bh = NULL; 1098 struct ocfs2_extent_list *el; 1099 1100 el = &xv->xr_list; 1101 clusters = le32_to_cpu(xv->xr_clusters); 1102 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 1103 blocksize = inode->i_sb->s_blocksize; 1104 1105 cpos = 0; 1106 while (cpos < clusters) { 1107 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, 1108 &num_clusters, el, NULL); 1109 if (ret) { 1110 mlog_errno(ret); 1111 goto out; 1112 } 1113 1114 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 1115 /* Copy ocfs2_xattr_value */ 1116 for (i = 0; i < num_clusters * bpc; i++, blkno++) { 1117 ret = ocfs2_read_block(INODE_CACHE(inode), blkno, 1118 &bh, NULL); 1119 if (ret) { 1120 mlog_errno(ret); 1121 goto out; 1122 } 1123 1124 cplen = len >= blocksize ? blocksize : len; 1125 memcpy(buffer, bh->b_data, cplen); 1126 len -= cplen; 1127 buffer += cplen; 1128 1129 brelse(bh); 1130 bh = NULL; 1131 if (len == 0) 1132 break; 1133 } 1134 cpos += num_clusters; 1135 } 1136 out: 1137 return ret; 1138 } 1139 1140 static int ocfs2_xattr_ibody_get(struct inode *inode, 1141 int name_index, 1142 const char *name, 1143 void *buffer, 1144 size_t buffer_size, 1145 struct ocfs2_xattr_search *xs) 1146 { 1147 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1148 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 1149 struct ocfs2_xattr_value_root *xv; 1150 size_t size; 1151 int ret = 0; 1152 1153 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) 1154 return -ENODATA; 1155 1156 xs->end = (void *)di + inode->i_sb->s_blocksize; 1157 xs->header = (struct ocfs2_xattr_header *) 1158 (xs->end - le16_to_cpu(di->i_xattr_inline_size)); 1159 xs->base = (void *)xs->header; 1160 xs->here = xs->header->xh_entries; 1161 1162 ret = ocfs2_xattr_find_entry(name_index, name, xs); 1163 if (ret) 1164 return ret; 1165 size = le64_to_cpu(xs->here->xe_value_size); 1166 if (buffer) { 1167 if (size > buffer_size) 1168 return -ERANGE; 1169 if (ocfs2_xattr_is_local(xs->here)) { 1170 memcpy(buffer, (void *)xs->base + 1171 le16_to_cpu(xs->here->xe_name_offset) + 1172 OCFS2_XATTR_SIZE(xs->here->xe_name_len), size); 1173 } else { 1174 xv = (struct ocfs2_xattr_value_root *) 1175 (xs->base + le16_to_cpu( 1176 xs->here->xe_name_offset) + 1177 OCFS2_XATTR_SIZE(xs->here->xe_name_len)); 1178 ret = ocfs2_xattr_get_value_outside(inode, xv, 1179 buffer, size); 1180 if (ret < 0) { 1181 mlog_errno(ret); 1182 return ret; 1183 } 1184 } 1185 } 1186 1187 return size; 1188 } 1189 1190 static int ocfs2_xattr_block_get(struct inode *inode, 1191 int name_index, 1192 const char *name, 1193 void *buffer, 1194 size_t buffer_size, 1195 struct ocfs2_xattr_search *xs) 1196 { 1197 struct ocfs2_xattr_block *xb; 1198 struct ocfs2_xattr_value_root *xv; 1199 size_t size; 1200 int ret = -ENODATA, name_offset, name_len, i; 1201 int uninitialized_var(block_off); 1202 1203 xs->bucket = ocfs2_xattr_bucket_new(inode); 1204 if (!xs->bucket) { 1205 ret = -ENOMEM; 1206 mlog_errno(ret); 1207 goto cleanup; 1208 } 1209 1210 ret = ocfs2_xattr_block_find(inode, name_index, name, xs); 1211 if (ret) { 1212 mlog_errno(ret); 1213 goto cleanup; 1214 } 1215 1216 if (xs->not_found) { 1217 ret = -ENODATA; 1218 goto cleanup; 1219 } 1220 1221 xb = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; 1222 size = le64_to_cpu(xs->here->xe_value_size); 1223 if (buffer) { 1224 ret = -ERANGE; 1225 if (size > buffer_size) 1226 goto cleanup; 1227 1228 name_offset = le16_to_cpu(xs->here->xe_name_offset); 1229 name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len); 1230 i = xs->here - xs->header->xh_entries; 1231 1232 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 1233 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 1234 bucket_xh(xs->bucket), 1235 i, 1236 &block_off, 1237 &name_offset); 1238 if (ret) { 1239 mlog_errno(ret); 1240 goto cleanup; 1241 } 1242 xs->base = bucket_block(xs->bucket, block_off); 1243 } 1244 if (ocfs2_xattr_is_local(xs->here)) { 1245 memcpy(buffer, (void *)xs->base + 1246 name_offset + name_len, size); 1247 } else { 1248 xv = (struct ocfs2_xattr_value_root *) 1249 (xs->base + name_offset + name_len); 1250 ret = ocfs2_xattr_get_value_outside(inode, xv, 1251 buffer, size); 1252 if (ret < 0) { 1253 mlog_errno(ret); 1254 goto cleanup; 1255 } 1256 } 1257 } 1258 ret = size; 1259 cleanup: 1260 ocfs2_xattr_bucket_free(xs->bucket); 1261 1262 brelse(xs->xattr_bh); 1263 xs->xattr_bh = NULL; 1264 return ret; 1265 } 1266 1267 int ocfs2_xattr_get_nolock(struct inode *inode, 1268 struct buffer_head *di_bh, 1269 int name_index, 1270 const char *name, 1271 void *buffer, 1272 size_t buffer_size) 1273 { 1274 int ret; 1275 struct ocfs2_dinode *di = NULL; 1276 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1277 struct ocfs2_xattr_search xis = { 1278 .not_found = -ENODATA, 1279 }; 1280 struct ocfs2_xattr_search xbs = { 1281 .not_found = -ENODATA, 1282 }; 1283 1284 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) 1285 return -EOPNOTSUPP; 1286 1287 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) 1288 return -ENODATA; 1289 1290 xis.inode_bh = xbs.inode_bh = di_bh; 1291 di = (struct ocfs2_dinode *)di_bh->b_data; 1292 1293 ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer, 1294 buffer_size, &xis); 1295 if (ret == -ENODATA && di->i_xattr_loc) 1296 ret = ocfs2_xattr_block_get(inode, name_index, name, buffer, 1297 buffer_size, &xbs); 1298 1299 return ret; 1300 } 1301 1302 /* ocfs2_xattr_get() 1303 * 1304 * Copy an extended attribute into the buffer provided. 1305 * Buffer is NULL to compute the size of buffer required. 1306 */ 1307 static int ocfs2_xattr_get(struct inode *inode, 1308 int name_index, 1309 const char *name, 1310 void *buffer, 1311 size_t buffer_size) 1312 { 1313 int ret; 1314 struct buffer_head *di_bh = NULL; 1315 1316 ret = ocfs2_inode_lock(inode, &di_bh, 0); 1317 if (ret < 0) { 1318 mlog_errno(ret); 1319 return ret; 1320 } 1321 down_read(&OCFS2_I(inode)->ip_xattr_sem); 1322 ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index, 1323 name, buffer, buffer_size); 1324 up_read(&OCFS2_I(inode)->ip_xattr_sem); 1325 1326 ocfs2_inode_unlock(inode, 0); 1327 1328 brelse(di_bh); 1329 1330 return ret; 1331 } 1332 1333 static int __ocfs2_xattr_set_value_outside(struct inode *inode, 1334 handle_t *handle, 1335 struct ocfs2_xattr_value_buf *vb, 1336 const void *value, 1337 int value_len) 1338 { 1339 int ret = 0, i, cp_len; 1340 u16 blocksize = inode->i_sb->s_blocksize; 1341 u32 p_cluster, num_clusters; 1342 u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 1343 u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len); 1344 u64 blkno; 1345 struct buffer_head *bh = NULL; 1346 unsigned int ext_flags; 1347 struct ocfs2_xattr_value_root *xv = vb->vb_xv; 1348 1349 BUG_ON(clusters > le32_to_cpu(xv->xr_clusters)); 1350 1351 while (cpos < clusters) { 1352 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, 1353 &num_clusters, &xv->xr_list, 1354 &ext_flags); 1355 if (ret) { 1356 mlog_errno(ret); 1357 goto out; 1358 } 1359 1360 BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED); 1361 1362 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 1363 1364 for (i = 0; i < num_clusters * bpc; i++, blkno++) { 1365 ret = ocfs2_read_block(INODE_CACHE(inode), blkno, 1366 &bh, NULL); 1367 if (ret) { 1368 mlog_errno(ret); 1369 goto out; 1370 } 1371 1372 ret = ocfs2_journal_access(handle, 1373 INODE_CACHE(inode), 1374 bh, 1375 OCFS2_JOURNAL_ACCESS_WRITE); 1376 if (ret < 0) { 1377 mlog_errno(ret); 1378 goto out; 1379 } 1380 1381 cp_len = value_len > blocksize ? blocksize : value_len; 1382 memcpy(bh->b_data, value, cp_len); 1383 value_len -= cp_len; 1384 value += cp_len; 1385 if (cp_len < blocksize) 1386 memset(bh->b_data + cp_len, 0, 1387 blocksize - cp_len); 1388 1389 ocfs2_journal_dirty(handle, bh); 1390 brelse(bh); 1391 bh = NULL; 1392 1393 /* 1394 * XXX: do we need to empty all the following 1395 * blocks in this cluster? 1396 */ 1397 if (!value_len) 1398 break; 1399 } 1400 cpos += num_clusters; 1401 } 1402 out: 1403 brelse(bh); 1404 1405 return ret; 1406 } 1407 1408 static int ocfs2_xa_check_space_helper(int needed_space, int free_start, 1409 int num_entries) 1410 { 1411 int free_space; 1412 1413 if (!needed_space) 1414 return 0; 1415 1416 free_space = free_start - 1417 sizeof(struct ocfs2_xattr_header) - 1418 (num_entries * sizeof(struct ocfs2_xattr_entry)) - 1419 OCFS2_XATTR_HEADER_GAP; 1420 if (free_space < 0) 1421 return -EIO; 1422 if (free_space < needed_space) 1423 return -ENOSPC; 1424 1425 return 0; 1426 } 1427 1428 static int ocfs2_xa_journal_access(handle_t *handle, struct ocfs2_xa_loc *loc, 1429 int type) 1430 { 1431 return loc->xl_ops->xlo_journal_access(handle, loc, type); 1432 } 1433 1434 static void ocfs2_xa_journal_dirty(handle_t *handle, struct ocfs2_xa_loc *loc) 1435 { 1436 loc->xl_ops->xlo_journal_dirty(handle, loc); 1437 } 1438 1439 /* Give a pointer into the storage for the given offset */ 1440 static void *ocfs2_xa_offset_pointer(struct ocfs2_xa_loc *loc, int offset) 1441 { 1442 BUG_ON(offset >= loc->xl_size); 1443 return loc->xl_ops->xlo_offset_pointer(loc, offset); 1444 } 1445 1446 /* 1447 * Wipe the name+value pair and allow the storage to reclaim it. This 1448 * must be followed by either removal of the entry or a call to 1449 * ocfs2_xa_add_namevalue(). 1450 */ 1451 static void ocfs2_xa_wipe_namevalue(struct ocfs2_xa_loc *loc) 1452 { 1453 loc->xl_ops->xlo_wipe_namevalue(loc); 1454 } 1455 1456 /* 1457 * Find lowest offset to a name+value pair. This is the start of our 1458 * downward-growing free space. 1459 */ 1460 static int ocfs2_xa_get_free_start(struct ocfs2_xa_loc *loc) 1461 { 1462 return loc->xl_ops->xlo_get_free_start(loc); 1463 } 1464 1465 /* Can we reuse loc->xl_entry for xi? */ 1466 static int ocfs2_xa_can_reuse_entry(struct ocfs2_xa_loc *loc, 1467 struct ocfs2_xattr_info *xi) 1468 { 1469 return loc->xl_ops->xlo_can_reuse(loc, xi); 1470 } 1471 1472 /* How much free space is needed to set the new value */ 1473 static int ocfs2_xa_check_space(struct ocfs2_xa_loc *loc, 1474 struct ocfs2_xattr_info *xi) 1475 { 1476 return loc->xl_ops->xlo_check_space(loc, xi); 1477 } 1478 1479 static void ocfs2_xa_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) 1480 { 1481 loc->xl_ops->xlo_add_entry(loc, name_hash); 1482 loc->xl_entry->xe_name_hash = cpu_to_le32(name_hash); 1483 /* 1484 * We can't leave the new entry's xe_name_offset at zero or 1485 * add_namevalue() will go nuts. We set it to the size of our 1486 * storage so that it can never be less than any other entry. 1487 */ 1488 loc->xl_entry->xe_name_offset = cpu_to_le16(loc->xl_size); 1489 } 1490 1491 static void ocfs2_xa_add_namevalue(struct ocfs2_xa_loc *loc, 1492 struct ocfs2_xattr_info *xi) 1493 { 1494 int size = namevalue_size_xi(xi); 1495 int nameval_offset; 1496 char *nameval_buf; 1497 1498 loc->xl_ops->xlo_add_namevalue(loc, size); 1499 loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len); 1500 loc->xl_entry->xe_name_len = xi->xi_name_len; 1501 ocfs2_xattr_set_type(loc->xl_entry, xi->xi_name_index); 1502 ocfs2_xattr_set_local(loc->xl_entry, 1503 xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE); 1504 1505 nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 1506 nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset); 1507 memset(nameval_buf, 0, size); 1508 memcpy(nameval_buf, xi->xi_name, xi->xi_name_len); 1509 } 1510 1511 static void ocfs2_xa_fill_value_buf(struct ocfs2_xa_loc *loc, 1512 struct ocfs2_xattr_value_buf *vb) 1513 { 1514 int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 1515 int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len); 1516 1517 /* Value bufs are for value trees */ 1518 BUG_ON(ocfs2_xattr_is_local(loc->xl_entry)); 1519 BUG_ON(namevalue_size_xe(loc->xl_entry) != 1520 (name_size + OCFS2_XATTR_ROOT_SIZE)); 1521 1522 loc->xl_ops->xlo_fill_value_buf(loc, vb); 1523 vb->vb_xv = 1524 (struct ocfs2_xattr_value_root *)ocfs2_xa_offset_pointer(loc, 1525 nameval_offset + 1526 name_size); 1527 } 1528 1529 static int ocfs2_xa_block_journal_access(handle_t *handle, 1530 struct ocfs2_xa_loc *loc, int type) 1531 { 1532 struct buffer_head *bh = loc->xl_storage; 1533 ocfs2_journal_access_func access; 1534 1535 if (loc->xl_size == (bh->b_size - 1536 offsetof(struct ocfs2_xattr_block, 1537 xb_attrs.xb_header))) 1538 access = ocfs2_journal_access_xb; 1539 else 1540 access = ocfs2_journal_access_di; 1541 return access(handle, INODE_CACHE(loc->xl_inode), bh, type); 1542 } 1543 1544 static void ocfs2_xa_block_journal_dirty(handle_t *handle, 1545 struct ocfs2_xa_loc *loc) 1546 { 1547 struct buffer_head *bh = loc->xl_storage; 1548 1549 ocfs2_journal_dirty(handle, bh); 1550 } 1551 1552 static void *ocfs2_xa_block_offset_pointer(struct ocfs2_xa_loc *loc, 1553 int offset) 1554 { 1555 return (char *)loc->xl_header + offset; 1556 } 1557 1558 static int ocfs2_xa_block_can_reuse(struct ocfs2_xa_loc *loc, 1559 struct ocfs2_xattr_info *xi) 1560 { 1561 /* 1562 * Block storage is strict. If the sizes aren't exact, we will 1563 * remove the old one and reinsert the new. 1564 */ 1565 return namevalue_size_xe(loc->xl_entry) == 1566 namevalue_size_xi(xi); 1567 } 1568 1569 static int ocfs2_xa_block_get_free_start(struct ocfs2_xa_loc *loc) 1570 { 1571 struct ocfs2_xattr_header *xh = loc->xl_header; 1572 int i, count = le16_to_cpu(xh->xh_count); 1573 int offset, free_start = loc->xl_size; 1574 1575 for (i = 0; i < count; i++) { 1576 offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset); 1577 if (offset < free_start) 1578 free_start = offset; 1579 } 1580 1581 return free_start; 1582 } 1583 1584 static int ocfs2_xa_block_check_space(struct ocfs2_xa_loc *loc, 1585 struct ocfs2_xattr_info *xi) 1586 { 1587 int count = le16_to_cpu(loc->xl_header->xh_count); 1588 int free_start = ocfs2_xa_get_free_start(loc); 1589 int needed_space = ocfs2_xi_entry_usage(xi); 1590 1591 /* 1592 * Block storage will reclaim the original entry before inserting 1593 * the new value, so we only need the difference. If the new 1594 * entry is smaller than the old one, we don't need anything. 1595 */ 1596 if (loc->xl_entry) { 1597 /* Don't need space if we're reusing! */ 1598 if (ocfs2_xa_can_reuse_entry(loc, xi)) 1599 needed_space = 0; 1600 else 1601 needed_space -= ocfs2_xe_entry_usage(loc->xl_entry); 1602 } 1603 if (needed_space < 0) 1604 needed_space = 0; 1605 return ocfs2_xa_check_space_helper(needed_space, free_start, count); 1606 } 1607 1608 /* 1609 * Block storage for xattrs keeps the name+value pairs compacted. When 1610 * we remove one, we have to shift any that preceded it towards the end. 1611 */ 1612 static void ocfs2_xa_block_wipe_namevalue(struct ocfs2_xa_loc *loc) 1613 { 1614 int i, offset; 1615 int namevalue_offset, first_namevalue_offset, namevalue_size; 1616 struct ocfs2_xattr_entry *entry = loc->xl_entry; 1617 struct ocfs2_xattr_header *xh = loc->xl_header; 1618 int count = le16_to_cpu(xh->xh_count); 1619 1620 namevalue_offset = le16_to_cpu(entry->xe_name_offset); 1621 namevalue_size = namevalue_size_xe(entry); 1622 first_namevalue_offset = ocfs2_xa_get_free_start(loc); 1623 1624 /* Shift the name+value pairs */ 1625 memmove((char *)xh + first_namevalue_offset + namevalue_size, 1626 (char *)xh + first_namevalue_offset, 1627 namevalue_offset - first_namevalue_offset); 1628 memset((char *)xh + first_namevalue_offset, 0, namevalue_size); 1629 1630 /* Now tell xh->xh_entries about it */ 1631 for (i = 0; i < count; i++) { 1632 offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset); 1633 if (offset <= namevalue_offset) 1634 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, 1635 namevalue_size); 1636 } 1637 1638 /* 1639 * Note that we don't update xh_free_start or xh_name_value_len 1640 * because they're not used in block-stored xattrs. 1641 */ 1642 } 1643 1644 static void ocfs2_xa_block_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) 1645 { 1646 int count = le16_to_cpu(loc->xl_header->xh_count); 1647 loc->xl_entry = &(loc->xl_header->xh_entries[count]); 1648 le16_add_cpu(&loc->xl_header->xh_count, 1); 1649 memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry)); 1650 } 1651 1652 static void ocfs2_xa_block_add_namevalue(struct ocfs2_xa_loc *loc, int size) 1653 { 1654 int free_start = ocfs2_xa_get_free_start(loc); 1655 1656 loc->xl_entry->xe_name_offset = cpu_to_le16(free_start - size); 1657 } 1658 1659 static void ocfs2_xa_block_fill_value_buf(struct ocfs2_xa_loc *loc, 1660 struct ocfs2_xattr_value_buf *vb) 1661 { 1662 struct buffer_head *bh = loc->xl_storage; 1663 1664 if (loc->xl_size == (bh->b_size - 1665 offsetof(struct ocfs2_xattr_block, 1666 xb_attrs.xb_header))) 1667 vb->vb_access = ocfs2_journal_access_xb; 1668 else 1669 vb->vb_access = ocfs2_journal_access_di; 1670 vb->vb_bh = bh; 1671 } 1672 1673 /* 1674 * Operations for xattrs stored in blocks. This includes inline inode 1675 * storage and unindexed ocfs2_xattr_blocks. 1676 */ 1677 static const struct ocfs2_xa_loc_operations ocfs2_xa_block_loc_ops = { 1678 .xlo_journal_access = ocfs2_xa_block_journal_access, 1679 .xlo_journal_dirty = ocfs2_xa_block_journal_dirty, 1680 .xlo_offset_pointer = ocfs2_xa_block_offset_pointer, 1681 .xlo_check_space = ocfs2_xa_block_check_space, 1682 .xlo_can_reuse = ocfs2_xa_block_can_reuse, 1683 .xlo_get_free_start = ocfs2_xa_block_get_free_start, 1684 .xlo_wipe_namevalue = ocfs2_xa_block_wipe_namevalue, 1685 .xlo_add_entry = ocfs2_xa_block_add_entry, 1686 .xlo_add_namevalue = ocfs2_xa_block_add_namevalue, 1687 .xlo_fill_value_buf = ocfs2_xa_block_fill_value_buf, 1688 }; 1689 1690 static int ocfs2_xa_bucket_journal_access(handle_t *handle, 1691 struct ocfs2_xa_loc *loc, int type) 1692 { 1693 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1694 1695 return ocfs2_xattr_bucket_journal_access(handle, bucket, type); 1696 } 1697 1698 static void ocfs2_xa_bucket_journal_dirty(handle_t *handle, 1699 struct ocfs2_xa_loc *loc) 1700 { 1701 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1702 1703 ocfs2_xattr_bucket_journal_dirty(handle, bucket); 1704 } 1705 1706 static void *ocfs2_xa_bucket_offset_pointer(struct ocfs2_xa_loc *loc, 1707 int offset) 1708 { 1709 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1710 int block, block_offset; 1711 1712 /* The header is at the front of the bucket */ 1713 block = offset >> loc->xl_inode->i_sb->s_blocksize_bits; 1714 block_offset = offset % loc->xl_inode->i_sb->s_blocksize; 1715 1716 return bucket_block(bucket, block) + block_offset; 1717 } 1718 1719 static int ocfs2_xa_bucket_can_reuse(struct ocfs2_xa_loc *loc, 1720 struct ocfs2_xattr_info *xi) 1721 { 1722 return namevalue_size_xe(loc->xl_entry) >= 1723 namevalue_size_xi(xi); 1724 } 1725 1726 static int ocfs2_xa_bucket_get_free_start(struct ocfs2_xa_loc *loc) 1727 { 1728 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1729 return le16_to_cpu(bucket_xh(bucket)->xh_free_start); 1730 } 1731 1732 static int ocfs2_bucket_align_free_start(struct super_block *sb, 1733 int free_start, int size) 1734 { 1735 /* 1736 * We need to make sure that the name+value pair fits within 1737 * one block. 1738 */ 1739 if (((free_start - size) >> sb->s_blocksize_bits) != 1740 ((free_start - 1) >> sb->s_blocksize_bits)) 1741 free_start -= free_start % sb->s_blocksize; 1742 1743 return free_start; 1744 } 1745 1746 static int ocfs2_xa_bucket_check_space(struct ocfs2_xa_loc *loc, 1747 struct ocfs2_xattr_info *xi) 1748 { 1749 int rc; 1750 int count = le16_to_cpu(loc->xl_header->xh_count); 1751 int free_start = ocfs2_xa_get_free_start(loc); 1752 int needed_space = ocfs2_xi_entry_usage(xi); 1753 int size = namevalue_size_xi(xi); 1754 struct super_block *sb = loc->xl_inode->i_sb; 1755 1756 /* 1757 * Bucket storage does not reclaim name+value pairs it cannot 1758 * reuse. They live as holes until the bucket fills, and then 1759 * the bucket is defragmented. However, the bucket can reclaim 1760 * the ocfs2_xattr_entry. 1761 */ 1762 if (loc->xl_entry) { 1763 /* Don't need space if we're reusing! */ 1764 if (ocfs2_xa_can_reuse_entry(loc, xi)) 1765 needed_space = 0; 1766 else 1767 needed_space -= sizeof(struct ocfs2_xattr_entry); 1768 } 1769 BUG_ON(needed_space < 0); 1770 1771 if (free_start < size) { 1772 if (needed_space) 1773 return -ENOSPC; 1774 } else { 1775 /* 1776 * First we check if it would fit in the first place. 1777 * Below, we align the free start to a block. This may 1778 * slide us below the minimum gap. By checking unaligned 1779 * first, we avoid that error. 1780 */ 1781 rc = ocfs2_xa_check_space_helper(needed_space, free_start, 1782 count); 1783 if (rc) 1784 return rc; 1785 free_start = ocfs2_bucket_align_free_start(sb, free_start, 1786 size); 1787 } 1788 return ocfs2_xa_check_space_helper(needed_space, free_start, count); 1789 } 1790 1791 static void ocfs2_xa_bucket_wipe_namevalue(struct ocfs2_xa_loc *loc) 1792 { 1793 le16_add_cpu(&loc->xl_header->xh_name_value_len, 1794 -namevalue_size_xe(loc->xl_entry)); 1795 } 1796 1797 static void ocfs2_xa_bucket_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) 1798 { 1799 struct ocfs2_xattr_header *xh = loc->xl_header; 1800 int count = le16_to_cpu(xh->xh_count); 1801 int low = 0, high = count - 1, tmp; 1802 struct ocfs2_xattr_entry *tmp_xe; 1803 1804 /* 1805 * We keep buckets sorted by name_hash, so we need to find 1806 * our insert place. 1807 */ 1808 while (low <= high && count) { 1809 tmp = (low + high) / 2; 1810 tmp_xe = &xh->xh_entries[tmp]; 1811 1812 if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash)) 1813 low = tmp + 1; 1814 else if (name_hash < le32_to_cpu(tmp_xe->xe_name_hash)) 1815 high = tmp - 1; 1816 else { 1817 low = tmp; 1818 break; 1819 } 1820 } 1821 1822 if (low != count) 1823 memmove(&xh->xh_entries[low + 1], 1824 &xh->xh_entries[low], 1825 ((count - low) * sizeof(struct ocfs2_xattr_entry))); 1826 1827 le16_add_cpu(&xh->xh_count, 1); 1828 loc->xl_entry = &xh->xh_entries[low]; 1829 memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry)); 1830 } 1831 1832 static void ocfs2_xa_bucket_add_namevalue(struct ocfs2_xa_loc *loc, int size) 1833 { 1834 int free_start = ocfs2_xa_get_free_start(loc); 1835 struct ocfs2_xattr_header *xh = loc->xl_header; 1836 struct super_block *sb = loc->xl_inode->i_sb; 1837 int nameval_offset; 1838 1839 free_start = ocfs2_bucket_align_free_start(sb, free_start, size); 1840 nameval_offset = free_start - size; 1841 loc->xl_entry->xe_name_offset = cpu_to_le16(nameval_offset); 1842 xh->xh_free_start = cpu_to_le16(nameval_offset); 1843 le16_add_cpu(&xh->xh_name_value_len, size); 1844 1845 } 1846 1847 static void ocfs2_xa_bucket_fill_value_buf(struct ocfs2_xa_loc *loc, 1848 struct ocfs2_xattr_value_buf *vb) 1849 { 1850 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1851 struct super_block *sb = loc->xl_inode->i_sb; 1852 int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 1853 int size = namevalue_size_xe(loc->xl_entry); 1854 int block_offset = nameval_offset >> sb->s_blocksize_bits; 1855 1856 /* Values are not allowed to straddle block boundaries */ 1857 BUG_ON(block_offset != 1858 ((nameval_offset + size - 1) >> sb->s_blocksize_bits)); 1859 /* We expect the bucket to be filled in */ 1860 BUG_ON(!bucket->bu_bhs[block_offset]); 1861 1862 vb->vb_access = ocfs2_journal_access; 1863 vb->vb_bh = bucket->bu_bhs[block_offset]; 1864 } 1865 1866 /* Operations for xattrs stored in buckets. */ 1867 static const struct ocfs2_xa_loc_operations ocfs2_xa_bucket_loc_ops = { 1868 .xlo_journal_access = ocfs2_xa_bucket_journal_access, 1869 .xlo_journal_dirty = ocfs2_xa_bucket_journal_dirty, 1870 .xlo_offset_pointer = ocfs2_xa_bucket_offset_pointer, 1871 .xlo_check_space = ocfs2_xa_bucket_check_space, 1872 .xlo_can_reuse = ocfs2_xa_bucket_can_reuse, 1873 .xlo_get_free_start = ocfs2_xa_bucket_get_free_start, 1874 .xlo_wipe_namevalue = ocfs2_xa_bucket_wipe_namevalue, 1875 .xlo_add_entry = ocfs2_xa_bucket_add_entry, 1876 .xlo_add_namevalue = ocfs2_xa_bucket_add_namevalue, 1877 .xlo_fill_value_buf = ocfs2_xa_bucket_fill_value_buf, 1878 }; 1879 1880 static unsigned int ocfs2_xa_value_clusters(struct ocfs2_xa_loc *loc) 1881 { 1882 struct ocfs2_xattr_value_buf vb; 1883 1884 if (ocfs2_xattr_is_local(loc->xl_entry)) 1885 return 0; 1886 1887 ocfs2_xa_fill_value_buf(loc, &vb); 1888 return le32_to_cpu(vb.vb_xv->xr_clusters); 1889 } 1890 1891 static int ocfs2_xa_value_truncate(struct ocfs2_xa_loc *loc, u64 bytes, 1892 struct ocfs2_xattr_set_ctxt *ctxt) 1893 { 1894 int trunc_rc, access_rc; 1895 struct ocfs2_xattr_value_buf vb; 1896 1897 ocfs2_xa_fill_value_buf(loc, &vb); 1898 trunc_rc = ocfs2_xattr_value_truncate(loc->xl_inode, &vb, bytes, 1899 ctxt); 1900 1901 /* 1902 * The caller of ocfs2_xa_value_truncate() has already called 1903 * ocfs2_xa_journal_access on the loc. However, The truncate code 1904 * calls ocfs2_extend_trans(). This may commit the previous 1905 * transaction and open a new one. If this is a bucket, truncate 1906 * could leave only vb->vb_bh set up for journaling. Meanwhile, 1907 * the caller is expecting to dirty the entire bucket. So we must 1908 * reset the journal work. We do this even if truncate has failed, 1909 * as it could have failed after committing the extend. 1910 */ 1911 access_rc = ocfs2_xa_journal_access(ctxt->handle, loc, 1912 OCFS2_JOURNAL_ACCESS_WRITE); 1913 1914 /* Errors in truncate take precedence */ 1915 return trunc_rc ? trunc_rc : access_rc; 1916 } 1917 1918 static void ocfs2_xa_remove_entry(struct ocfs2_xa_loc *loc) 1919 { 1920 int index, count; 1921 struct ocfs2_xattr_header *xh = loc->xl_header; 1922 struct ocfs2_xattr_entry *entry = loc->xl_entry; 1923 1924 ocfs2_xa_wipe_namevalue(loc); 1925 loc->xl_entry = NULL; 1926 1927 le16_add_cpu(&xh->xh_count, -1); 1928 count = le16_to_cpu(xh->xh_count); 1929 1930 /* 1931 * Only zero out the entry if there are more remaining. This is 1932 * important for an empty bucket, as it keeps track of the 1933 * bucket's hash value. It doesn't hurt empty block storage. 1934 */ 1935 if (count) { 1936 index = ((char *)entry - (char *)&xh->xh_entries) / 1937 sizeof(struct ocfs2_xattr_entry); 1938 memmove(&xh->xh_entries[index], &xh->xh_entries[index + 1], 1939 (count - index) * sizeof(struct ocfs2_xattr_entry)); 1940 memset(&xh->xh_entries[count], 0, 1941 sizeof(struct ocfs2_xattr_entry)); 1942 } 1943 } 1944 1945 /* 1946 * If we have a problem adjusting the size of an external value during 1947 * ocfs2_xa_prepare_entry() or ocfs2_xa_remove(), we may have an xattr 1948 * in an intermediate state. For example, the value may be partially 1949 * truncated. 1950 * 1951 * If the value tree hasn't changed, the extend/truncate went nowhere. 1952 * We have nothing to do. The caller can treat it as a straight error. 1953 * 1954 * If the value tree got partially truncated, we now have a corrupted 1955 * extended attribute. We're going to wipe its entry and leak the 1956 * clusters. Better to leak some storage than leave a corrupt entry. 1957 * 1958 * If the value tree grew, it obviously didn't grow enough for the 1959 * new entry. We're not going to try and reclaim those clusters either. 1960 * If there was already an external value there (orig_clusters != 0), 1961 * the new clusters are attached safely and we can just leave the old 1962 * value in place. If there was no external value there, we remove 1963 * the entry. 1964 * 1965 * This way, the xattr block we store in the journal will be consistent. 1966 * If the size change broke because of the journal, no changes will hit 1967 * disk anyway. 1968 */ 1969 static void ocfs2_xa_cleanup_value_truncate(struct ocfs2_xa_loc *loc, 1970 const char *what, 1971 unsigned int orig_clusters) 1972 { 1973 unsigned int new_clusters = ocfs2_xa_value_clusters(loc); 1974 char *nameval_buf = ocfs2_xa_offset_pointer(loc, 1975 le16_to_cpu(loc->xl_entry->xe_name_offset)); 1976 1977 if (new_clusters < orig_clusters) { 1978 mlog(ML_ERROR, 1979 "Partial truncate while %s xattr %.*s. Leaking " 1980 "%u clusters and removing the entry\n", 1981 what, loc->xl_entry->xe_name_len, nameval_buf, 1982 orig_clusters - new_clusters); 1983 ocfs2_xa_remove_entry(loc); 1984 } else if (!orig_clusters) { 1985 mlog(ML_ERROR, 1986 "Unable to allocate an external value for xattr " 1987 "%.*s safely. Leaking %u clusters and removing the " 1988 "entry\n", 1989 loc->xl_entry->xe_name_len, nameval_buf, 1990 new_clusters - orig_clusters); 1991 ocfs2_xa_remove_entry(loc); 1992 } else if (new_clusters > orig_clusters) 1993 mlog(ML_ERROR, 1994 "Unable to grow xattr %.*s safely. %u new clusters " 1995 "have been added, but the value will not be " 1996 "modified\n", 1997 loc->xl_entry->xe_name_len, nameval_buf, 1998 new_clusters - orig_clusters); 1999 } 2000 2001 static int ocfs2_xa_remove(struct ocfs2_xa_loc *loc, 2002 struct ocfs2_xattr_set_ctxt *ctxt) 2003 { 2004 int rc = 0; 2005 unsigned int orig_clusters; 2006 2007 if (!ocfs2_xattr_is_local(loc->xl_entry)) { 2008 orig_clusters = ocfs2_xa_value_clusters(loc); 2009 rc = ocfs2_xa_value_truncate(loc, 0, ctxt); 2010 if (rc) { 2011 mlog_errno(rc); 2012 /* 2013 * Since this is remove, we can return 0 if 2014 * ocfs2_xa_cleanup_value_truncate() is going to 2015 * wipe the entry anyway. So we check the 2016 * cluster count as well. 2017 */ 2018 if (orig_clusters != ocfs2_xa_value_clusters(loc)) 2019 rc = 0; 2020 ocfs2_xa_cleanup_value_truncate(loc, "removing", 2021 orig_clusters); 2022 if (rc) 2023 goto out; 2024 } 2025 } 2026 2027 ocfs2_xa_remove_entry(loc); 2028 2029 out: 2030 return rc; 2031 } 2032 2033 static void ocfs2_xa_install_value_root(struct ocfs2_xa_loc *loc) 2034 { 2035 int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len); 2036 char *nameval_buf; 2037 2038 nameval_buf = ocfs2_xa_offset_pointer(loc, 2039 le16_to_cpu(loc->xl_entry->xe_name_offset)); 2040 memcpy(nameval_buf + name_size, &def_xv, OCFS2_XATTR_ROOT_SIZE); 2041 } 2042 2043 /* 2044 * Take an existing entry and make it ready for the new value. This 2045 * won't allocate space, but it may free space. It should be ready for 2046 * ocfs2_xa_prepare_entry() to finish the work. 2047 */ 2048 static int ocfs2_xa_reuse_entry(struct ocfs2_xa_loc *loc, 2049 struct ocfs2_xattr_info *xi, 2050 struct ocfs2_xattr_set_ctxt *ctxt) 2051 { 2052 int rc = 0; 2053 int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len); 2054 unsigned int orig_clusters; 2055 char *nameval_buf; 2056 int xe_local = ocfs2_xattr_is_local(loc->xl_entry); 2057 int xi_local = xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE; 2058 2059 BUG_ON(OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len) != 2060 name_size); 2061 2062 nameval_buf = ocfs2_xa_offset_pointer(loc, 2063 le16_to_cpu(loc->xl_entry->xe_name_offset)); 2064 if (xe_local) { 2065 memset(nameval_buf + name_size, 0, 2066 namevalue_size_xe(loc->xl_entry) - name_size); 2067 if (!xi_local) 2068 ocfs2_xa_install_value_root(loc); 2069 } else { 2070 orig_clusters = ocfs2_xa_value_clusters(loc); 2071 if (xi_local) { 2072 rc = ocfs2_xa_value_truncate(loc, 0, ctxt); 2073 if (rc < 0) 2074 mlog_errno(rc); 2075 else 2076 memset(nameval_buf + name_size, 0, 2077 namevalue_size_xe(loc->xl_entry) - 2078 name_size); 2079 } else if (le64_to_cpu(loc->xl_entry->xe_value_size) > 2080 xi->xi_value_len) { 2081 rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, 2082 ctxt); 2083 if (rc < 0) 2084 mlog_errno(rc); 2085 } 2086 2087 if (rc) { 2088 ocfs2_xa_cleanup_value_truncate(loc, "reusing", 2089 orig_clusters); 2090 goto out; 2091 } 2092 } 2093 2094 loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len); 2095 ocfs2_xattr_set_local(loc->xl_entry, xi_local); 2096 2097 out: 2098 return rc; 2099 } 2100 2101 /* 2102 * Prepares loc->xl_entry to receive the new xattr. This includes 2103 * properly setting up the name+value pair region. If loc->xl_entry 2104 * already exists, it will take care of modifying it appropriately. 2105 * 2106 * Note that this modifies the data. You did journal_access already, 2107 * right? 2108 */ 2109 static int ocfs2_xa_prepare_entry(struct ocfs2_xa_loc *loc, 2110 struct ocfs2_xattr_info *xi, 2111 u32 name_hash, 2112 struct ocfs2_xattr_set_ctxt *ctxt) 2113 { 2114 int rc = 0; 2115 unsigned int orig_clusters; 2116 __le64 orig_value_size = 0; 2117 2118 rc = ocfs2_xa_check_space(loc, xi); 2119 if (rc) 2120 goto out; 2121 2122 if (loc->xl_entry) { 2123 if (ocfs2_xa_can_reuse_entry(loc, xi)) { 2124 orig_value_size = loc->xl_entry->xe_value_size; 2125 rc = ocfs2_xa_reuse_entry(loc, xi, ctxt); 2126 if (rc) 2127 goto out; 2128 goto alloc_value; 2129 } 2130 2131 if (!ocfs2_xattr_is_local(loc->xl_entry)) { 2132 orig_clusters = ocfs2_xa_value_clusters(loc); 2133 rc = ocfs2_xa_value_truncate(loc, 0, ctxt); 2134 if (rc) { 2135 mlog_errno(rc); 2136 ocfs2_xa_cleanup_value_truncate(loc, 2137 "overwriting", 2138 orig_clusters); 2139 goto out; 2140 } 2141 } 2142 ocfs2_xa_wipe_namevalue(loc); 2143 } else 2144 ocfs2_xa_add_entry(loc, name_hash); 2145 2146 /* 2147 * If we get here, we have a blank entry. Fill it. We grow our 2148 * name+value pair back from the end. 2149 */ 2150 ocfs2_xa_add_namevalue(loc, xi); 2151 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) 2152 ocfs2_xa_install_value_root(loc); 2153 2154 alloc_value: 2155 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 2156 orig_clusters = ocfs2_xa_value_clusters(loc); 2157 rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, ctxt); 2158 if (rc < 0) { 2159 ctxt->set_abort = 1; 2160 ocfs2_xa_cleanup_value_truncate(loc, "growing", 2161 orig_clusters); 2162 /* 2163 * If we were growing an existing value, 2164 * ocfs2_xa_cleanup_value_truncate() won't remove 2165 * the entry. We need to restore the original value 2166 * size. 2167 */ 2168 if (loc->xl_entry) { 2169 BUG_ON(!orig_value_size); 2170 loc->xl_entry->xe_value_size = orig_value_size; 2171 } 2172 mlog_errno(rc); 2173 } 2174 } 2175 2176 out: 2177 return rc; 2178 } 2179 2180 /* 2181 * Store the value portion of the name+value pair. This will skip 2182 * values that are stored externally. Their tree roots were set up 2183 * by ocfs2_xa_prepare_entry(). 2184 */ 2185 static int ocfs2_xa_store_value(struct ocfs2_xa_loc *loc, 2186 struct ocfs2_xattr_info *xi, 2187 struct ocfs2_xattr_set_ctxt *ctxt) 2188 { 2189 int rc = 0; 2190 int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 2191 int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len); 2192 char *nameval_buf; 2193 struct ocfs2_xattr_value_buf vb; 2194 2195 nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset); 2196 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 2197 ocfs2_xa_fill_value_buf(loc, &vb); 2198 rc = __ocfs2_xattr_set_value_outside(loc->xl_inode, 2199 ctxt->handle, &vb, 2200 xi->xi_value, 2201 xi->xi_value_len); 2202 } else 2203 memcpy(nameval_buf + name_size, xi->xi_value, xi->xi_value_len); 2204 2205 return rc; 2206 } 2207 2208 static int ocfs2_xa_set(struct ocfs2_xa_loc *loc, 2209 struct ocfs2_xattr_info *xi, 2210 struct ocfs2_xattr_set_ctxt *ctxt) 2211 { 2212 int ret; 2213 u32 name_hash = ocfs2_xattr_name_hash(loc->xl_inode, xi->xi_name, 2214 xi->xi_name_len); 2215 2216 ret = ocfs2_xa_journal_access(ctxt->handle, loc, 2217 OCFS2_JOURNAL_ACCESS_WRITE); 2218 if (ret) { 2219 mlog_errno(ret); 2220 goto out; 2221 } 2222 2223 /* 2224 * From here on out, everything is going to modify the buffer a 2225 * little. Errors are going to leave the xattr header in a 2226 * sane state. Thus, even with errors we dirty the sucker. 2227 */ 2228 2229 /* Don't worry, we are never called with !xi_value and !xl_entry */ 2230 if (!xi->xi_value) { 2231 ret = ocfs2_xa_remove(loc, ctxt); 2232 goto out_dirty; 2233 } 2234 2235 ret = ocfs2_xa_prepare_entry(loc, xi, name_hash, ctxt); 2236 if (ret) { 2237 if (ret != -ENOSPC) 2238 mlog_errno(ret); 2239 goto out_dirty; 2240 } 2241 2242 ret = ocfs2_xa_store_value(loc, xi, ctxt); 2243 if (ret) 2244 mlog_errno(ret); 2245 2246 out_dirty: 2247 ocfs2_xa_journal_dirty(ctxt->handle, loc); 2248 2249 out: 2250 return ret; 2251 } 2252 2253 static void ocfs2_init_dinode_xa_loc(struct ocfs2_xa_loc *loc, 2254 struct inode *inode, 2255 struct buffer_head *bh, 2256 struct ocfs2_xattr_entry *entry) 2257 { 2258 struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; 2259 2260 BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_XATTR_FL)); 2261 2262 loc->xl_inode = inode; 2263 loc->xl_ops = &ocfs2_xa_block_loc_ops; 2264 loc->xl_storage = bh; 2265 loc->xl_entry = entry; 2266 loc->xl_size = le16_to_cpu(di->i_xattr_inline_size); 2267 loc->xl_header = 2268 (struct ocfs2_xattr_header *)(bh->b_data + bh->b_size - 2269 loc->xl_size); 2270 } 2271 2272 static void ocfs2_init_xattr_block_xa_loc(struct ocfs2_xa_loc *loc, 2273 struct inode *inode, 2274 struct buffer_head *bh, 2275 struct ocfs2_xattr_entry *entry) 2276 { 2277 struct ocfs2_xattr_block *xb = 2278 (struct ocfs2_xattr_block *)bh->b_data; 2279 2280 BUG_ON(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED); 2281 2282 loc->xl_inode = inode; 2283 loc->xl_ops = &ocfs2_xa_block_loc_ops; 2284 loc->xl_storage = bh; 2285 loc->xl_header = &(xb->xb_attrs.xb_header); 2286 loc->xl_entry = entry; 2287 loc->xl_size = bh->b_size - offsetof(struct ocfs2_xattr_block, 2288 xb_attrs.xb_header); 2289 } 2290 2291 static void ocfs2_init_xattr_bucket_xa_loc(struct ocfs2_xa_loc *loc, 2292 struct ocfs2_xattr_bucket *bucket, 2293 struct ocfs2_xattr_entry *entry) 2294 { 2295 loc->xl_inode = bucket->bu_inode; 2296 loc->xl_ops = &ocfs2_xa_bucket_loc_ops; 2297 loc->xl_storage = bucket; 2298 loc->xl_header = bucket_xh(bucket); 2299 loc->xl_entry = entry; 2300 loc->xl_size = OCFS2_XATTR_BUCKET_SIZE; 2301 } 2302 2303 /* 2304 * In xattr remove, if it is stored outside and refcounted, we may have 2305 * the chance to split the refcount tree. So need the allocators. 2306 */ 2307 static int ocfs2_lock_xattr_remove_allocators(struct inode *inode, 2308 struct ocfs2_xattr_value_root *xv, 2309 struct ocfs2_caching_info *ref_ci, 2310 struct buffer_head *ref_root_bh, 2311 struct ocfs2_alloc_context **meta_ac, 2312 int *ref_credits) 2313 { 2314 int ret, meta_add = 0; 2315 u32 p_cluster, num_clusters; 2316 unsigned int ext_flags; 2317 2318 *ref_credits = 0; 2319 ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster, 2320 &num_clusters, 2321 &xv->xr_list, 2322 &ext_flags); 2323 if (ret) { 2324 mlog_errno(ret); 2325 goto out; 2326 } 2327 2328 if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) 2329 goto out; 2330 2331 ret = ocfs2_refcounted_xattr_delete_need(inode, ref_ci, 2332 ref_root_bh, xv, 2333 &meta_add, ref_credits); 2334 if (ret) { 2335 mlog_errno(ret); 2336 goto out; 2337 } 2338 2339 ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb), 2340 meta_add, meta_ac); 2341 if (ret) 2342 mlog_errno(ret); 2343 2344 out: 2345 return ret; 2346 } 2347 2348 static int ocfs2_remove_value_outside(struct inode*inode, 2349 struct ocfs2_xattr_value_buf *vb, 2350 struct ocfs2_xattr_header *header, 2351 struct ocfs2_caching_info *ref_ci, 2352 struct buffer_head *ref_root_bh) 2353 { 2354 int ret = 0, i, ref_credits; 2355 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2356 struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, }; 2357 void *val; 2358 2359 ocfs2_init_dealloc_ctxt(&ctxt.dealloc); 2360 2361 for (i = 0; i < le16_to_cpu(header->xh_count); i++) { 2362 struct ocfs2_xattr_entry *entry = &header->xh_entries[i]; 2363 2364 if (ocfs2_xattr_is_local(entry)) 2365 continue; 2366 2367 val = (void *)header + 2368 le16_to_cpu(entry->xe_name_offset); 2369 vb->vb_xv = (struct ocfs2_xattr_value_root *) 2370 (val + OCFS2_XATTR_SIZE(entry->xe_name_len)); 2371 2372 ret = ocfs2_lock_xattr_remove_allocators(inode, vb->vb_xv, 2373 ref_ci, ref_root_bh, 2374 &ctxt.meta_ac, 2375 &ref_credits); 2376 2377 ctxt.handle = ocfs2_start_trans(osb, ref_credits + 2378 ocfs2_remove_extent_credits(osb->sb)); 2379 if (IS_ERR(ctxt.handle)) { 2380 ret = PTR_ERR(ctxt.handle); 2381 mlog_errno(ret); 2382 break; 2383 } 2384 2385 ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt); 2386 2387 ocfs2_commit_trans(osb, ctxt.handle); 2388 if (ctxt.meta_ac) { 2389 ocfs2_free_alloc_context(ctxt.meta_ac); 2390 ctxt.meta_ac = NULL; 2391 } 2392 2393 if (ret < 0) { 2394 mlog_errno(ret); 2395 break; 2396 } 2397 2398 } 2399 2400 if (ctxt.meta_ac) 2401 ocfs2_free_alloc_context(ctxt.meta_ac); 2402 ocfs2_schedule_truncate_log_flush(osb, 1); 2403 ocfs2_run_deallocs(osb, &ctxt.dealloc); 2404 return ret; 2405 } 2406 2407 static int ocfs2_xattr_ibody_remove(struct inode *inode, 2408 struct buffer_head *di_bh, 2409 struct ocfs2_caching_info *ref_ci, 2410 struct buffer_head *ref_root_bh) 2411 { 2412 2413 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 2414 struct ocfs2_xattr_header *header; 2415 int ret; 2416 struct ocfs2_xattr_value_buf vb = { 2417 .vb_bh = di_bh, 2418 .vb_access = ocfs2_journal_access_di, 2419 }; 2420 2421 header = (struct ocfs2_xattr_header *) 2422 ((void *)di + inode->i_sb->s_blocksize - 2423 le16_to_cpu(di->i_xattr_inline_size)); 2424 2425 ret = ocfs2_remove_value_outside(inode, &vb, header, 2426 ref_ci, ref_root_bh); 2427 2428 return ret; 2429 } 2430 2431 struct ocfs2_rm_xattr_bucket_para { 2432 struct ocfs2_caching_info *ref_ci; 2433 struct buffer_head *ref_root_bh; 2434 }; 2435 2436 static int ocfs2_xattr_block_remove(struct inode *inode, 2437 struct buffer_head *blk_bh, 2438 struct ocfs2_caching_info *ref_ci, 2439 struct buffer_head *ref_root_bh) 2440 { 2441 struct ocfs2_xattr_block *xb; 2442 int ret = 0; 2443 struct ocfs2_xattr_value_buf vb = { 2444 .vb_bh = blk_bh, 2445 .vb_access = ocfs2_journal_access_xb, 2446 }; 2447 struct ocfs2_rm_xattr_bucket_para args = { 2448 .ref_ci = ref_ci, 2449 .ref_root_bh = ref_root_bh, 2450 }; 2451 2452 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 2453 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 2454 struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header); 2455 ret = ocfs2_remove_value_outside(inode, &vb, header, 2456 ref_ci, ref_root_bh); 2457 } else 2458 ret = ocfs2_iterate_xattr_index_block(inode, 2459 blk_bh, 2460 ocfs2_rm_xattr_cluster, 2461 &args); 2462 2463 return ret; 2464 } 2465 2466 static int ocfs2_xattr_free_block(struct inode *inode, 2467 u64 block, 2468 struct ocfs2_caching_info *ref_ci, 2469 struct buffer_head *ref_root_bh) 2470 { 2471 struct inode *xb_alloc_inode; 2472 struct buffer_head *xb_alloc_bh = NULL; 2473 struct buffer_head *blk_bh = NULL; 2474 struct ocfs2_xattr_block *xb; 2475 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2476 handle_t *handle; 2477 int ret = 0; 2478 u64 blk, bg_blkno; 2479 u16 bit; 2480 2481 ret = ocfs2_read_xattr_block(inode, block, &blk_bh); 2482 if (ret < 0) { 2483 mlog_errno(ret); 2484 goto out; 2485 } 2486 2487 ret = ocfs2_xattr_block_remove(inode, blk_bh, ref_ci, ref_root_bh); 2488 if (ret < 0) { 2489 mlog_errno(ret); 2490 goto out; 2491 } 2492 2493 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 2494 blk = le64_to_cpu(xb->xb_blkno); 2495 bit = le16_to_cpu(xb->xb_suballoc_bit); 2496 if (xb->xb_suballoc_loc) 2497 bg_blkno = le64_to_cpu(xb->xb_suballoc_loc); 2498 else 2499 bg_blkno = ocfs2_which_suballoc_group(blk, bit); 2500 2501 xb_alloc_inode = ocfs2_get_system_file_inode(osb, 2502 EXTENT_ALLOC_SYSTEM_INODE, 2503 le16_to_cpu(xb->xb_suballoc_slot)); 2504 if (!xb_alloc_inode) { 2505 ret = -ENOMEM; 2506 mlog_errno(ret); 2507 goto out; 2508 } 2509 mutex_lock(&xb_alloc_inode->i_mutex); 2510 2511 ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1); 2512 if (ret < 0) { 2513 mlog_errno(ret); 2514 goto out_mutex; 2515 } 2516 2517 handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE); 2518 if (IS_ERR(handle)) { 2519 ret = PTR_ERR(handle); 2520 mlog_errno(ret); 2521 goto out_unlock; 2522 } 2523 2524 ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh, 2525 bit, bg_blkno, 1); 2526 if (ret < 0) 2527 mlog_errno(ret); 2528 2529 ocfs2_commit_trans(osb, handle); 2530 out_unlock: 2531 ocfs2_inode_unlock(xb_alloc_inode, 1); 2532 brelse(xb_alloc_bh); 2533 out_mutex: 2534 mutex_unlock(&xb_alloc_inode->i_mutex); 2535 iput(xb_alloc_inode); 2536 out: 2537 brelse(blk_bh); 2538 return ret; 2539 } 2540 2541 /* 2542 * ocfs2_xattr_remove() 2543 * 2544 * Free extended attribute resources associated with this inode. 2545 */ 2546 int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh) 2547 { 2548 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2549 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 2550 struct ocfs2_refcount_tree *ref_tree = NULL; 2551 struct buffer_head *ref_root_bh = NULL; 2552 struct ocfs2_caching_info *ref_ci = NULL; 2553 handle_t *handle; 2554 int ret; 2555 2556 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) 2557 return 0; 2558 2559 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) 2560 return 0; 2561 2562 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL) { 2563 ret = ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb), 2564 le64_to_cpu(di->i_refcount_loc), 2565 1, &ref_tree, &ref_root_bh); 2566 if (ret) { 2567 mlog_errno(ret); 2568 goto out; 2569 } 2570 ref_ci = &ref_tree->rf_ci; 2571 2572 } 2573 2574 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 2575 ret = ocfs2_xattr_ibody_remove(inode, di_bh, 2576 ref_ci, ref_root_bh); 2577 if (ret < 0) { 2578 mlog_errno(ret); 2579 goto out; 2580 } 2581 } 2582 2583 if (di->i_xattr_loc) { 2584 ret = ocfs2_xattr_free_block(inode, 2585 le64_to_cpu(di->i_xattr_loc), 2586 ref_ci, ref_root_bh); 2587 if (ret < 0) { 2588 mlog_errno(ret); 2589 goto out; 2590 } 2591 } 2592 2593 handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), 2594 OCFS2_INODE_UPDATE_CREDITS); 2595 if (IS_ERR(handle)) { 2596 ret = PTR_ERR(handle); 2597 mlog_errno(ret); 2598 goto out; 2599 } 2600 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, 2601 OCFS2_JOURNAL_ACCESS_WRITE); 2602 if (ret) { 2603 mlog_errno(ret); 2604 goto out_commit; 2605 } 2606 2607 di->i_xattr_loc = 0; 2608 2609 spin_lock(&oi->ip_lock); 2610 oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL); 2611 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 2612 spin_unlock(&oi->ip_lock); 2613 ocfs2_update_inode_fsync_trans(handle, inode, 0); 2614 2615 ocfs2_journal_dirty(handle, di_bh); 2616 out_commit: 2617 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); 2618 out: 2619 if (ref_tree) 2620 ocfs2_unlock_refcount_tree(OCFS2_SB(inode->i_sb), ref_tree, 1); 2621 brelse(ref_root_bh); 2622 return ret; 2623 } 2624 2625 static int ocfs2_xattr_has_space_inline(struct inode *inode, 2626 struct ocfs2_dinode *di) 2627 { 2628 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2629 unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size; 2630 int free; 2631 2632 if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE) 2633 return 0; 2634 2635 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 2636 struct ocfs2_inline_data *idata = &di->id2.i_data; 2637 free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size); 2638 } else if (ocfs2_inode_is_fast_symlink(inode)) { 2639 free = ocfs2_fast_symlink_chars(inode->i_sb) - 2640 le64_to_cpu(di->i_size); 2641 } else { 2642 struct ocfs2_extent_list *el = &di->id2.i_list; 2643 free = (le16_to_cpu(el->l_count) - 2644 le16_to_cpu(el->l_next_free_rec)) * 2645 sizeof(struct ocfs2_extent_rec); 2646 } 2647 if (free >= xattrsize) 2648 return 1; 2649 2650 return 0; 2651 } 2652 2653 /* 2654 * ocfs2_xattr_ibody_find() 2655 * 2656 * Find extended attribute in inode block and 2657 * fill search info into struct ocfs2_xattr_search. 2658 */ 2659 static int ocfs2_xattr_ibody_find(struct inode *inode, 2660 int name_index, 2661 const char *name, 2662 struct ocfs2_xattr_search *xs) 2663 { 2664 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2665 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 2666 int ret; 2667 int has_space = 0; 2668 2669 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) 2670 return 0; 2671 2672 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { 2673 down_read(&oi->ip_alloc_sem); 2674 has_space = ocfs2_xattr_has_space_inline(inode, di); 2675 up_read(&oi->ip_alloc_sem); 2676 if (!has_space) 2677 return 0; 2678 } 2679 2680 xs->xattr_bh = xs->inode_bh; 2681 xs->end = (void *)di + inode->i_sb->s_blocksize; 2682 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) 2683 xs->header = (struct ocfs2_xattr_header *) 2684 (xs->end - le16_to_cpu(di->i_xattr_inline_size)); 2685 else 2686 xs->header = (struct ocfs2_xattr_header *) 2687 (xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size); 2688 xs->base = (void *)xs->header; 2689 xs->here = xs->header->xh_entries; 2690 2691 /* Find the named attribute. */ 2692 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 2693 ret = ocfs2_xattr_find_entry(name_index, name, xs); 2694 if (ret && ret != -ENODATA) 2695 return ret; 2696 xs->not_found = ret; 2697 } 2698 2699 return 0; 2700 } 2701 2702 static int ocfs2_xattr_ibody_init(struct inode *inode, 2703 struct buffer_head *di_bh, 2704 struct ocfs2_xattr_set_ctxt *ctxt) 2705 { 2706 int ret; 2707 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2708 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 2709 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2710 unsigned int xattrsize = osb->s_xattr_inline_size; 2711 2712 if (!ocfs2_xattr_has_space_inline(inode, di)) { 2713 ret = -ENOSPC; 2714 goto out; 2715 } 2716 2717 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), di_bh, 2718 OCFS2_JOURNAL_ACCESS_WRITE); 2719 if (ret) { 2720 mlog_errno(ret); 2721 goto out; 2722 } 2723 2724 /* 2725 * Adjust extent record count or inline data size 2726 * to reserve space for extended attribute. 2727 */ 2728 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 2729 struct ocfs2_inline_data *idata = &di->id2.i_data; 2730 le16_add_cpu(&idata->id_count, -xattrsize); 2731 } else if (!(ocfs2_inode_is_fast_symlink(inode))) { 2732 struct ocfs2_extent_list *el = &di->id2.i_list; 2733 le16_add_cpu(&el->l_count, -(xattrsize / 2734 sizeof(struct ocfs2_extent_rec))); 2735 } 2736 di->i_xattr_inline_size = cpu_to_le16(xattrsize); 2737 2738 spin_lock(&oi->ip_lock); 2739 oi->ip_dyn_features |= OCFS2_INLINE_XATTR_FL|OCFS2_HAS_XATTR_FL; 2740 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 2741 spin_unlock(&oi->ip_lock); 2742 2743 ocfs2_journal_dirty(ctxt->handle, di_bh); 2744 2745 out: 2746 return ret; 2747 } 2748 2749 /* 2750 * ocfs2_xattr_ibody_set() 2751 * 2752 * Set, replace or remove an extended attribute into inode block. 2753 * 2754 */ 2755 static int ocfs2_xattr_ibody_set(struct inode *inode, 2756 struct ocfs2_xattr_info *xi, 2757 struct ocfs2_xattr_search *xs, 2758 struct ocfs2_xattr_set_ctxt *ctxt) 2759 { 2760 int ret; 2761 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2762 struct ocfs2_xa_loc loc; 2763 2764 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) 2765 return -ENOSPC; 2766 2767 down_write(&oi->ip_alloc_sem); 2768 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { 2769 ret = ocfs2_xattr_ibody_init(inode, xs->inode_bh, ctxt); 2770 if (ret) { 2771 if (ret != -ENOSPC) 2772 mlog_errno(ret); 2773 goto out; 2774 } 2775 } 2776 2777 ocfs2_init_dinode_xa_loc(&loc, inode, xs->inode_bh, 2778 xs->not_found ? NULL : xs->here); 2779 ret = ocfs2_xa_set(&loc, xi, ctxt); 2780 if (ret) { 2781 if (ret != -ENOSPC) 2782 mlog_errno(ret); 2783 goto out; 2784 } 2785 xs->here = loc.xl_entry; 2786 2787 out: 2788 up_write(&oi->ip_alloc_sem); 2789 2790 return ret; 2791 } 2792 2793 /* 2794 * ocfs2_xattr_block_find() 2795 * 2796 * Find extended attribute in external block and 2797 * fill search info into struct ocfs2_xattr_search. 2798 */ 2799 static int ocfs2_xattr_block_find(struct inode *inode, 2800 int name_index, 2801 const char *name, 2802 struct ocfs2_xattr_search *xs) 2803 { 2804 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 2805 struct buffer_head *blk_bh = NULL; 2806 struct ocfs2_xattr_block *xb; 2807 int ret = 0; 2808 2809 if (!di->i_xattr_loc) 2810 return ret; 2811 2812 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc), 2813 &blk_bh); 2814 if (ret < 0) { 2815 mlog_errno(ret); 2816 return ret; 2817 } 2818 2819 xs->xattr_bh = blk_bh; 2820 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 2821 2822 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 2823 xs->header = &xb->xb_attrs.xb_header; 2824 xs->base = (void *)xs->header; 2825 xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size; 2826 xs->here = xs->header->xh_entries; 2827 2828 ret = ocfs2_xattr_find_entry(name_index, name, xs); 2829 } else 2830 ret = ocfs2_xattr_index_block_find(inode, blk_bh, 2831 name_index, 2832 name, xs); 2833 2834 if (ret && ret != -ENODATA) { 2835 xs->xattr_bh = NULL; 2836 goto cleanup; 2837 } 2838 xs->not_found = ret; 2839 return 0; 2840 cleanup: 2841 brelse(blk_bh); 2842 2843 return ret; 2844 } 2845 2846 static int ocfs2_create_xattr_block(struct inode *inode, 2847 struct buffer_head *inode_bh, 2848 struct ocfs2_xattr_set_ctxt *ctxt, 2849 int indexed, 2850 struct buffer_head **ret_bh) 2851 { 2852 int ret; 2853 u16 suballoc_bit_start; 2854 u32 num_got; 2855 u64 suballoc_loc, first_blkno; 2856 struct ocfs2_dinode *di = (struct ocfs2_dinode *)inode_bh->b_data; 2857 struct buffer_head *new_bh = NULL; 2858 struct ocfs2_xattr_block *xblk; 2859 2860 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), 2861 inode_bh, OCFS2_JOURNAL_ACCESS_CREATE); 2862 if (ret < 0) { 2863 mlog_errno(ret); 2864 goto end; 2865 } 2866 2867 ret = ocfs2_claim_metadata(ctxt->handle, ctxt->meta_ac, 1, 2868 &suballoc_loc, &suballoc_bit_start, 2869 &num_got, &first_blkno); 2870 if (ret < 0) { 2871 mlog_errno(ret); 2872 goto end; 2873 } 2874 2875 new_bh = sb_getblk(inode->i_sb, first_blkno); 2876 if (!new_bh) { 2877 ret = -ENOMEM; 2878 mlog_errno(ret); 2879 goto end; 2880 } 2881 2882 ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh); 2883 2884 ret = ocfs2_journal_access_xb(ctxt->handle, INODE_CACHE(inode), 2885 new_bh, 2886 OCFS2_JOURNAL_ACCESS_CREATE); 2887 if (ret < 0) { 2888 mlog_errno(ret); 2889 goto end; 2890 } 2891 2892 /* Initialize ocfs2_xattr_block */ 2893 xblk = (struct ocfs2_xattr_block *)new_bh->b_data; 2894 memset(xblk, 0, inode->i_sb->s_blocksize); 2895 strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE); 2896 xblk->xb_suballoc_slot = cpu_to_le16(ctxt->meta_ac->ac_alloc_slot); 2897 xblk->xb_suballoc_loc = cpu_to_le64(suballoc_loc); 2898 xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start); 2899 xblk->xb_fs_generation = 2900 cpu_to_le32(OCFS2_SB(inode->i_sb)->fs_generation); 2901 xblk->xb_blkno = cpu_to_le64(first_blkno); 2902 if (indexed) { 2903 struct ocfs2_xattr_tree_root *xr = &xblk->xb_attrs.xb_root; 2904 xr->xt_clusters = cpu_to_le32(1); 2905 xr->xt_last_eb_blk = 0; 2906 xr->xt_list.l_tree_depth = 0; 2907 xr->xt_list.l_count = cpu_to_le16( 2908 ocfs2_xattr_recs_per_xb(inode->i_sb)); 2909 xr->xt_list.l_next_free_rec = cpu_to_le16(1); 2910 xblk->xb_flags = cpu_to_le16(OCFS2_XATTR_INDEXED); 2911 } 2912 ocfs2_journal_dirty(ctxt->handle, new_bh); 2913 2914 /* Add it to the inode */ 2915 di->i_xattr_loc = cpu_to_le64(first_blkno); 2916 2917 spin_lock(&OCFS2_I(inode)->ip_lock); 2918 OCFS2_I(inode)->ip_dyn_features |= OCFS2_HAS_XATTR_FL; 2919 di->i_dyn_features = cpu_to_le16(OCFS2_I(inode)->ip_dyn_features); 2920 spin_unlock(&OCFS2_I(inode)->ip_lock); 2921 2922 ocfs2_journal_dirty(ctxt->handle, inode_bh); 2923 2924 *ret_bh = new_bh; 2925 new_bh = NULL; 2926 2927 end: 2928 brelse(new_bh); 2929 return ret; 2930 } 2931 2932 /* 2933 * ocfs2_xattr_block_set() 2934 * 2935 * Set, replace or remove an extended attribute into external block. 2936 * 2937 */ 2938 static int ocfs2_xattr_block_set(struct inode *inode, 2939 struct ocfs2_xattr_info *xi, 2940 struct ocfs2_xattr_search *xs, 2941 struct ocfs2_xattr_set_ctxt *ctxt) 2942 { 2943 struct buffer_head *new_bh = NULL; 2944 struct ocfs2_xattr_block *xblk = NULL; 2945 int ret; 2946 struct ocfs2_xa_loc loc; 2947 2948 if (!xs->xattr_bh) { 2949 ret = ocfs2_create_xattr_block(inode, xs->inode_bh, ctxt, 2950 0, &new_bh); 2951 if (ret) { 2952 mlog_errno(ret); 2953 goto end; 2954 } 2955 2956 xs->xattr_bh = new_bh; 2957 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; 2958 xs->header = &xblk->xb_attrs.xb_header; 2959 xs->base = (void *)xs->header; 2960 xs->end = (void *)xblk + inode->i_sb->s_blocksize; 2961 xs->here = xs->header->xh_entries; 2962 } else 2963 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; 2964 2965 if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) { 2966 ocfs2_init_xattr_block_xa_loc(&loc, inode, xs->xattr_bh, 2967 xs->not_found ? NULL : xs->here); 2968 2969 ret = ocfs2_xa_set(&loc, xi, ctxt); 2970 if (!ret) 2971 xs->here = loc.xl_entry; 2972 else if ((ret != -ENOSPC) || ctxt->set_abort) 2973 goto end; 2974 else { 2975 ret = ocfs2_xattr_create_index_block(inode, xs, ctxt); 2976 if (ret) 2977 goto end; 2978 } 2979 } 2980 2981 if (le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED) 2982 ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt); 2983 2984 end: 2985 return ret; 2986 } 2987 2988 /* Check whether the new xattr can be inserted into the inode. */ 2989 static int ocfs2_xattr_can_be_in_inode(struct inode *inode, 2990 struct ocfs2_xattr_info *xi, 2991 struct ocfs2_xattr_search *xs) 2992 { 2993 struct ocfs2_xattr_entry *last; 2994 int free, i; 2995 size_t min_offs = xs->end - xs->base; 2996 2997 if (!xs->header) 2998 return 0; 2999 3000 last = xs->header->xh_entries; 3001 3002 for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) { 3003 size_t offs = le16_to_cpu(last->xe_name_offset); 3004 if (offs < min_offs) 3005 min_offs = offs; 3006 last += 1; 3007 } 3008 3009 free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP; 3010 if (free < 0) 3011 return 0; 3012 3013 BUG_ON(!xs->not_found); 3014 3015 if (free >= (sizeof(struct ocfs2_xattr_entry) + namevalue_size_xi(xi))) 3016 return 1; 3017 3018 return 0; 3019 } 3020 3021 static int ocfs2_calc_xattr_set_need(struct inode *inode, 3022 struct ocfs2_dinode *di, 3023 struct ocfs2_xattr_info *xi, 3024 struct ocfs2_xattr_search *xis, 3025 struct ocfs2_xattr_search *xbs, 3026 int *clusters_need, 3027 int *meta_need, 3028 int *credits_need) 3029 { 3030 int ret = 0, old_in_xb = 0; 3031 int clusters_add = 0, meta_add = 0, credits = 0; 3032 struct buffer_head *bh = NULL; 3033 struct ocfs2_xattr_block *xb = NULL; 3034 struct ocfs2_xattr_entry *xe = NULL; 3035 struct ocfs2_xattr_value_root *xv = NULL; 3036 char *base = NULL; 3037 int name_offset, name_len = 0; 3038 u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, 3039 xi->xi_value_len); 3040 u64 value_size; 3041 3042 /* 3043 * Calculate the clusters we need to write. 3044 * No matter whether we replace an old one or add a new one, 3045 * we need this for writing. 3046 */ 3047 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) 3048 credits += new_clusters * 3049 ocfs2_clusters_to_blocks(inode->i_sb, 1); 3050 3051 if (xis->not_found && xbs->not_found) { 3052 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3053 3054 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 3055 clusters_add += new_clusters; 3056 credits += ocfs2_calc_extend_credits(inode->i_sb, 3057 &def_xv.xv.xr_list); 3058 } 3059 3060 goto meta_guess; 3061 } 3062 3063 if (!xis->not_found) { 3064 xe = xis->here; 3065 name_offset = le16_to_cpu(xe->xe_name_offset); 3066 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 3067 base = xis->base; 3068 credits += OCFS2_INODE_UPDATE_CREDITS; 3069 } else { 3070 int i, block_off = 0; 3071 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; 3072 xe = xbs->here; 3073 name_offset = le16_to_cpu(xe->xe_name_offset); 3074 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 3075 i = xbs->here - xbs->header->xh_entries; 3076 old_in_xb = 1; 3077 3078 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 3079 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 3080 bucket_xh(xbs->bucket), 3081 i, &block_off, 3082 &name_offset); 3083 base = bucket_block(xbs->bucket, block_off); 3084 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3085 } else { 3086 base = xbs->base; 3087 credits += OCFS2_XATTR_BLOCK_UPDATE_CREDITS; 3088 } 3089 } 3090 3091 /* 3092 * delete a xattr doesn't need metadata and cluster allocation. 3093 * so just calculate the credits and return. 3094 * 3095 * The credits for removing the value tree will be extended 3096 * by ocfs2_remove_extent itself. 3097 */ 3098 if (!xi->xi_value) { 3099 if (!ocfs2_xattr_is_local(xe)) 3100 credits += ocfs2_remove_extent_credits(inode->i_sb); 3101 3102 goto out; 3103 } 3104 3105 /* do cluster allocation guess first. */ 3106 value_size = le64_to_cpu(xe->xe_value_size); 3107 3108 if (old_in_xb) { 3109 /* 3110 * In xattr set, we always try to set the xe in inode first, 3111 * so if it can be inserted into inode successfully, the old 3112 * one will be removed from the xattr block, and this xattr 3113 * will be inserted into inode as a new xattr in inode. 3114 */ 3115 if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) { 3116 clusters_add += new_clusters; 3117 credits += ocfs2_remove_extent_credits(inode->i_sb) + 3118 OCFS2_INODE_UPDATE_CREDITS; 3119 if (!ocfs2_xattr_is_local(xe)) 3120 credits += ocfs2_calc_extend_credits( 3121 inode->i_sb, 3122 &def_xv.xv.xr_list); 3123 goto out; 3124 } 3125 } 3126 3127 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 3128 /* the new values will be stored outside. */ 3129 u32 old_clusters = 0; 3130 3131 if (!ocfs2_xattr_is_local(xe)) { 3132 old_clusters = ocfs2_clusters_for_bytes(inode->i_sb, 3133 value_size); 3134 xv = (struct ocfs2_xattr_value_root *) 3135 (base + name_offset + name_len); 3136 value_size = OCFS2_XATTR_ROOT_SIZE; 3137 } else 3138 xv = &def_xv.xv; 3139 3140 if (old_clusters >= new_clusters) { 3141 credits += ocfs2_remove_extent_credits(inode->i_sb); 3142 goto out; 3143 } else { 3144 meta_add += ocfs2_extend_meta_needed(&xv->xr_list); 3145 clusters_add += new_clusters - old_clusters; 3146 credits += ocfs2_calc_extend_credits(inode->i_sb, 3147 &xv->xr_list); 3148 if (value_size >= OCFS2_XATTR_ROOT_SIZE) 3149 goto out; 3150 } 3151 } else { 3152 /* 3153 * Now the new value will be stored inside. So if the new 3154 * value is smaller than the size of value root or the old 3155 * value, we don't need any allocation, otherwise we have 3156 * to guess metadata allocation. 3157 */ 3158 if ((ocfs2_xattr_is_local(xe) && 3159 (value_size >= xi->xi_value_len)) || 3160 (!ocfs2_xattr_is_local(xe) && 3161 OCFS2_XATTR_ROOT_SIZE >= xi->xi_value_len)) 3162 goto out; 3163 } 3164 3165 meta_guess: 3166 /* calculate metadata allocation. */ 3167 if (di->i_xattr_loc) { 3168 if (!xbs->xattr_bh) { 3169 ret = ocfs2_read_xattr_block(inode, 3170 le64_to_cpu(di->i_xattr_loc), 3171 &bh); 3172 if (ret) { 3173 mlog_errno(ret); 3174 goto out; 3175 } 3176 3177 xb = (struct ocfs2_xattr_block *)bh->b_data; 3178 } else 3179 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; 3180 3181 /* 3182 * If there is already an xattr tree, good, we can calculate 3183 * like other b-trees. Otherwise we may have the chance of 3184 * create a tree, the credit calculation is borrowed from 3185 * ocfs2_calc_extend_credits with root_el = NULL. And the 3186 * new tree will be cluster based, so no meta is needed. 3187 */ 3188 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 3189 struct ocfs2_extent_list *el = 3190 &xb->xb_attrs.xb_root.xt_list; 3191 meta_add += ocfs2_extend_meta_needed(el); 3192 credits += ocfs2_calc_extend_credits(inode->i_sb, 3193 el); 3194 } else 3195 credits += OCFS2_SUBALLOC_ALLOC + 1; 3196 3197 /* 3198 * This cluster will be used either for new bucket or for 3199 * new xattr block. 3200 * If the cluster size is the same as the bucket size, one 3201 * more is needed since we may need to extend the bucket 3202 * also. 3203 */ 3204 clusters_add += 1; 3205 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3206 if (OCFS2_XATTR_BUCKET_SIZE == 3207 OCFS2_SB(inode->i_sb)->s_clustersize) { 3208 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3209 clusters_add += 1; 3210 } 3211 } else { 3212 credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; 3213 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 3214 struct ocfs2_extent_list *el = &def_xv.xv.xr_list; 3215 meta_add += ocfs2_extend_meta_needed(el); 3216 credits += ocfs2_calc_extend_credits(inode->i_sb, 3217 el); 3218 } else { 3219 meta_add += 1; 3220 } 3221 } 3222 out: 3223 if (clusters_need) 3224 *clusters_need = clusters_add; 3225 if (meta_need) 3226 *meta_need = meta_add; 3227 if (credits_need) 3228 *credits_need = credits; 3229 brelse(bh); 3230 return ret; 3231 } 3232 3233 static int ocfs2_init_xattr_set_ctxt(struct inode *inode, 3234 struct ocfs2_dinode *di, 3235 struct ocfs2_xattr_info *xi, 3236 struct ocfs2_xattr_search *xis, 3237 struct ocfs2_xattr_search *xbs, 3238 struct ocfs2_xattr_set_ctxt *ctxt, 3239 int extra_meta, 3240 int *credits) 3241 { 3242 int clusters_add, meta_add, ret; 3243 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 3244 3245 memset(ctxt, 0, sizeof(struct ocfs2_xattr_set_ctxt)); 3246 3247 ocfs2_init_dealloc_ctxt(&ctxt->dealloc); 3248 3249 ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs, 3250 &clusters_add, &meta_add, credits); 3251 if (ret) { 3252 mlog_errno(ret); 3253 return ret; 3254 } 3255 3256 meta_add += extra_meta; 3257 trace_ocfs2_init_xattr_set_ctxt(xi->xi_name, meta_add, 3258 clusters_add, *credits); 3259 3260 if (meta_add) { 3261 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, 3262 &ctxt->meta_ac); 3263 if (ret) { 3264 mlog_errno(ret); 3265 goto out; 3266 } 3267 } 3268 3269 if (clusters_add) { 3270 ret = ocfs2_reserve_clusters(osb, clusters_add, &ctxt->data_ac); 3271 if (ret) 3272 mlog_errno(ret); 3273 } 3274 out: 3275 if (ret) { 3276 if (ctxt->meta_ac) { 3277 ocfs2_free_alloc_context(ctxt->meta_ac); 3278 ctxt->meta_ac = NULL; 3279 } 3280 3281 /* 3282 * We cannot have an error and a non null ctxt->data_ac. 3283 */ 3284 } 3285 3286 return ret; 3287 } 3288 3289 static int __ocfs2_xattr_set_handle(struct inode *inode, 3290 struct ocfs2_dinode *di, 3291 struct ocfs2_xattr_info *xi, 3292 struct ocfs2_xattr_search *xis, 3293 struct ocfs2_xattr_search *xbs, 3294 struct ocfs2_xattr_set_ctxt *ctxt) 3295 { 3296 int ret = 0, credits, old_found; 3297 3298 if (!xi->xi_value) { 3299 /* Remove existing extended attribute */ 3300 if (!xis->not_found) 3301 ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt); 3302 else if (!xbs->not_found) 3303 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); 3304 } else { 3305 /* We always try to set extended attribute into inode first*/ 3306 ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt); 3307 if (!ret && !xbs->not_found) { 3308 /* 3309 * If succeed and that extended attribute existing in 3310 * external block, then we will remove it. 3311 */ 3312 xi->xi_value = NULL; 3313 xi->xi_value_len = 0; 3314 3315 old_found = xis->not_found; 3316 xis->not_found = -ENODATA; 3317 ret = ocfs2_calc_xattr_set_need(inode, 3318 di, 3319 xi, 3320 xis, 3321 xbs, 3322 NULL, 3323 NULL, 3324 &credits); 3325 xis->not_found = old_found; 3326 if (ret) { 3327 mlog_errno(ret); 3328 goto out; 3329 } 3330 3331 ret = ocfs2_extend_trans(ctxt->handle, credits); 3332 if (ret) { 3333 mlog_errno(ret); 3334 goto out; 3335 } 3336 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); 3337 } else if ((ret == -ENOSPC) && !ctxt->set_abort) { 3338 if (di->i_xattr_loc && !xbs->xattr_bh) { 3339 ret = ocfs2_xattr_block_find(inode, 3340 xi->xi_name_index, 3341 xi->xi_name, xbs); 3342 if (ret) 3343 goto out; 3344 3345 old_found = xis->not_found; 3346 xis->not_found = -ENODATA; 3347 ret = ocfs2_calc_xattr_set_need(inode, 3348 di, 3349 xi, 3350 xis, 3351 xbs, 3352 NULL, 3353 NULL, 3354 &credits); 3355 xis->not_found = old_found; 3356 if (ret) { 3357 mlog_errno(ret); 3358 goto out; 3359 } 3360 3361 ret = ocfs2_extend_trans(ctxt->handle, credits); 3362 if (ret) { 3363 mlog_errno(ret); 3364 goto out; 3365 } 3366 } 3367 /* 3368 * If no space in inode, we will set extended attribute 3369 * into external block. 3370 */ 3371 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); 3372 if (ret) 3373 goto out; 3374 if (!xis->not_found) { 3375 /* 3376 * If succeed and that extended attribute 3377 * existing in inode, we will remove it. 3378 */ 3379 xi->xi_value = NULL; 3380 xi->xi_value_len = 0; 3381 xbs->not_found = -ENODATA; 3382 ret = ocfs2_calc_xattr_set_need(inode, 3383 di, 3384 xi, 3385 xis, 3386 xbs, 3387 NULL, 3388 NULL, 3389 &credits); 3390 if (ret) { 3391 mlog_errno(ret); 3392 goto out; 3393 } 3394 3395 ret = ocfs2_extend_trans(ctxt->handle, credits); 3396 if (ret) { 3397 mlog_errno(ret); 3398 goto out; 3399 } 3400 ret = ocfs2_xattr_ibody_set(inode, xi, 3401 xis, ctxt); 3402 } 3403 } 3404 } 3405 3406 if (!ret) { 3407 /* Update inode ctime. */ 3408 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), 3409 xis->inode_bh, 3410 OCFS2_JOURNAL_ACCESS_WRITE); 3411 if (ret) { 3412 mlog_errno(ret); 3413 goto out; 3414 } 3415 3416 inode->i_ctime = CURRENT_TIME; 3417 di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); 3418 di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); 3419 ocfs2_journal_dirty(ctxt->handle, xis->inode_bh); 3420 } 3421 out: 3422 return ret; 3423 } 3424 3425 /* 3426 * This function only called duing creating inode 3427 * for init security/acl xattrs of the new inode. 3428 * All transanction credits have been reserved in mknod. 3429 */ 3430 int ocfs2_xattr_set_handle(handle_t *handle, 3431 struct inode *inode, 3432 struct buffer_head *di_bh, 3433 int name_index, 3434 const char *name, 3435 const void *value, 3436 size_t value_len, 3437 int flags, 3438 struct ocfs2_alloc_context *meta_ac, 3439 struct ocfs2_alloc_context *data_ac) 3440 { 3441 struct ocfs2_dinode *di; 3442 int ret; 3443 3444 struct ocfs2_xattr_info xi = { 3445 .xi_name_index = name_index, 3446 .xi_name = name, 3447 .xi_name_len = strlen(name), 3448 .xi_value = value, 3449 .xi_value_len = value_len, 3450 }; 3451 3452 struct ocfs2_xattr_search xis = { 3453 .not_found = -ENODATA, 3454 }; 3455 3456 struct ocfs2_xattr_search xbs = { 3457 .not_found = -ENODATA, 3458 }; 3459 3460 struct ocfs2_xattr_set_ctxt ctxt = { 3461 .handle = handle, 3462 .meta_ac = meta_ac, 3463 .data_ac = data_ac, 3464 }; 3465 3466 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) 3467 return -EOPNOTSUPP; 3468 3469 /* 3470 * In extreme situation, may need xattr bucket when 3471 * block size is too small. And we have already reserved 3472 * the credits for bucket in mknod. 3473 */ 3474 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) { 3475 xbs.bucket = ocfs2_xattr_bucket_new(inode); 3476 if (!xbs.bucket) { 3477 mlog_errno(-ENOMEM); 3478 return -ENOMEM; 3479 } 3480 } 3481 3482 xis.inode_bh = xbs.inode_bh = di_bh; 3483 di = (struct ocfs2_dinode *)di_bh->b_data; 3484 3485 down_write(&OCFS2_I(inode)->ip_xattr_sem); 3486 3487 ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis); 3488 if (ret) 3489 goto cleanup; 3490 if (xis.not_found) { 3491 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs); 3492 if (ret) 3493 goto cleanup; 3494 } 3495 3496 ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt); 3497 3498 cleanup: 3499 up_write(&OCFS2_I(inode)->ip_xattr_sem); 3500 brelse(xbs.xattr_bh); 3501 ocfs2_xattr_bucket_free(xbs.bucket); 3502 3503 return ret; 3504 } 3505 3506 /* 3507 * ocfs2_xattr_set() 3508 * 3509 * Set, replace or remove an extended attribute for this inode. 3510 * value is NULL to remove an existing extended attribute, else either 3511 * create or replace an extended attribute. 3512 */ 3513 int ocfs2_xattr_set(struct inode *inode, 3514 int name_index, 3515 const char *name, 3516 const void *value, 3517 size_t value_len, 3518 int flags) 3519 { 3520 struct buffer_head *di_bh = NULL; 3521 struct ocfs2_dinode *di; 3522 int ret, credits, ref_meta = 0, ref_credits = 0; 3523 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 3524 struct inode *tl_inode = osb->osb_tl_inode; 3525 struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, NULL, }; 3526 struct ocfs2_refcount_tree *ref_tree = NULL; 3527 3528 struct ocfs2_xattr_info xi = { 3529 .xi_name_index = name_index, 3530 .xi_name = name, 3531 .xi_name_len = strlen(name), 3532 .xi_value = value, 3533 .xi_value_len = value_len, 3534 }; 3535 3536 struct ocfs2_xattr_search xis = { 3537 .not_found = -ENODATA, 3538 }; 3539 3540 struct ocfs2_xattr_search xbs = { 3541 .not_found = -ENODATA, 3542 }; 3543 3544 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) 3545 return -EOPNOTSUPP; 3546 3547 /* 3548 * Only xbs will be used on indexed trees. xis doesn't need a 3549 * bucket. 3550 */ 3551 xbs.bucket = ocfs2_xattr_bucket_new(inode); 3552 if (!xbs.bucket) { 3553 mlog_errno(-ENOMEM); 3554 return -ENOMEM; 3555 } 3556 3557 ret = ocfs2_inode_lock(inode, &di_bh, 1); 3558 if (ret < 0) { 3559 mlog_errno(ret); 3560 goto cleanup_nolock; 3561 } 3562 xis.inode_bh = xbs.inode_bh = di_bh; 3563 di = (struct ocfs2_dinode *)di_bh->b_data; 3564 3565 down_write(&OCFS2_I(inode)->ip_xattr_sem); 3566 /* 3567 * Scan inode and external block to find the same name 3568 * extended attribute and collect search information. 3569 */ 3570 ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis); 3571 if (ret) 3572 goto cleanup; 3573 if (xis.not_found) { 3574 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs); 3575 if (ret) 3576 goto cleanup; 3577 } 3578 3579 if (xis.not_found && xbs.not_found) { 3580 ret = -ENODATA; 3581 if (flags & XATTR_REPLACE) 3582 goto cleanup; 3583 ret = 0; 3584 if (!value) 3585 goto cleanup; 3586 } else { 3587 ret = -EEXIST; 3588 if (flags & XATTR_CREATE) 3589 goto cleanup; 3590 } 3591 3592 /* Check whether the value is refcounted and do some preparation. */ 3593 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL && 3594 (!xis.not_found || !xbs.not_found)) { 3595 ret = ocfs2_prepare_refcount_xattr(inode, di, &xi, 3596 &xis, &xbs, &ref_tree, 3597 &ref_meta, &ref_credits); 3598 if (ret) { 3599 mlog_errno(ret); 3600 goto cleanup; 3601 } 3602 } 3603 3604 mutex_lock(&tl_inode->i_mutex); 3605 3606 if (ocfs2_truncate_log_needs_flush(osb)) { 3607 ret = __ocfs2_flush_truncate_log(osb); 3608 if (ret < 0) { 3609 mutex_unlock(&tl_inode->i_mutex); 3610 mlog_errno(ret); 3611 goto cleanup; 3612 } 3613 } 3614 mutex_unlock(&tl_inode->i_mutex); 3615 3616 ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis, 3617 &xbs, &ctxt, ref_meta, &credits); 3618 if (ret) { 3619 mlog_errno(ret); 3620 goto cleanup; 3621 } 3622 3623 /* we need to update inode's ctime field, so add credit for it. */ 3624 credits += OCFS2_INODE_UPDATE_CREDITS; 3625 ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits); 3626 if (IS_ERR(ctxt.handle)) { 3627 ret = PTR_ERR(ctxt.handle); 3628 mlog_errno(ret); 3629 goto out_free_ac; 3630 } 3631 3632 ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt); 3633 ocfs2_update_inode_fsync_trans(ctxt.handle, inode, 0); 3634 3635 ocfs2_commit_trans(osb, ctxt.handle); 3636 3637 out_free_ac: 3638 if (ctxt.data_ac) 3639 ocfs2_free_alloc_context(ctxt.data_ac); 3640 if (ctxt.meta_ac) 3641 ocfs2_free_alloc_context(ctxt.meta_ac); 3642 if (ocfs2_dealloc_has_cluster(&ctxt.dealloc)) 3643 ocfs2_schedule_truncate_log_flush(osb, 1); 3644 ocfs2_run_deallocs(osb, &ctxt.dealloc); 3645 3646 cleanup: 3647 if (ref_tree) 3648 ocfs2_unlock_refcount_tree(osb, ref_tree, 1); 3649 up_write(&OCFS2_I(inode)->ip_xattr_sem); 3650 if (!value && !ret) { 3651 ret = ocfs2_try_remove_refcount_tree(inode, di_bh); 3652 if (ret) 3653 mlog_errno(ret); 3654 } 3655 ocfs2_inode_unlock(inode, 1); 3656 cleanup_nolock: 3657 brelse(di_bh); 3658 brelse(xbs.xattr_bh); 3659 ocfs2_xattr_bucket_free(xbs.bucket); 3660 3661 return ret; 3662 } 3663 3664 /* 3665 * Find the xattr extent rec which may contains name_hash. 3666 * e_cpos will be the first name hash of the xattr rec. 3667 * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list. 3668 */ 3669 static int ocfs2_xattr_get_rec(struct inode *inode, 3670 u32 name_hash, 3671 u64 *p_blkno, 3672 u32 *e_cpos, 3673 u32 *num_clusters, 3674 struct ocfs2_extent_list *el) 3675 { 3676 int ret = 0, i; 3677 struct buffer_head *eb_bh = NULL; 3678 struct ocfs2_extent_block *eb; 3679 struct ocfs2_extent_rec *rec = NULL; 3680 u64 e_blkno = 0; 3681 3682 if (el->l_tree_depth) { 3683 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, name_hash, 3684 &eb_bh); 3685 if (ret) { 3686 mlog_errno(ret); 3687 goto out; 3688 } 3689 3690 eb = (struct ocfs2_extent_block *) eb_bh->b_data; 3691 el = &eb->h_list; 3692 3693 if (el->l_tree_depth) { 3694 ret = ocfs2_error(inode->i_sb, 3695 "Inode %lu has non zero tree depth in " 3696 "xattr tree block %llu\n", inode->i_ino, 3697 (unsigned long long)eb_bh->b_blocknr); 3698 goto out; 3699 } 3700 } 3701 3702 for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) { 3703 rec = &el->l_recs[i]; 3704 3705 if (le32_to_cpu(rec->e_cpos) <= name_hash) { 3706 e_blkno = le64_to_cpu(rec->e_blkno); 3707 break; 3708 } 3709 } 3710 3711 if (!e_blkno) { 3712 ret = ocfs2_error(inode->i_sb, "Inode %lu has bad extent " 3713 "record (%u, %u, 0) in xattr", inode->i_ino, 3714 le32_to_cpu(rec->e_cpos), 3715 ocfs2_rec_clusters(el, rec)); 3716 goto out; 3717 } 3718 3719 *p_blkno = le64_to_cpu(rec->e_blkno); 3720 *num_clusters = le16_to_cpu(rec->e_leaf_clusters); 3721 if (e_cpos) 3722 *e_cpos = le32_to_cpu(rec->e_cpos); 3723 out: 3724 brelse(eb_bh); 3725 return ret; 3726 } 3727 3728 typedef int (xattr_bucket_func)(struct inode *inode, 3729 struct ocfs2_xattr_bucket *bucket, 3730 void *para); 3731 3732 static int ocfs2_find_xe_in_bucket(struct inode *inode, 3733 struct ocfs2_xattr_bucket *bucket, 3734 int name_index, 3735 const char *name, 3736 u32 name_hash, 3737 u16 *xe_index, 3738 int *found) 3739 { 3740 int i, ret = 0, cmp = 1, block_off, new_offset; 3741 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 3742 size_t name_len = strlen(name); 3743 struct ocfs2_xattr_entry *xe = NULL; 3744 char *xe_name; 3745 3746 /* 3747 * We don't use binary search in the bucket because there 3748 * may be multiple entries with the same name hash. 3749 */ 3750 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 3751 xe = &xh->xh_entries[i]; 3752 3753 if (name_hash > le32_to_cpu(xe->xe_name_hash)) 3754 continue; 3755 else if (name_hash < le32_to_cpu(xe->xe_name_hash)) 3756 break; 3757 3758 cmp = name_index - ocfs2_xattr_get_type(xe); 3759 if (!cmp) 3760 cmp = name_len - xe->xe_name_len; 3761 if (cmp) 3762 continue; 3763 3764 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 3765 xh, 3766 i, 3767 &block_off, 3768 &new_offset); 3769 if (ret) { 3770 mlog_errno(ret); 3771 break; 3772 } 3773 3774 3775 xe_name = bucket_block(bucket, block_off) + new_offset; 3776 if (!memcmp(name, xe_name, name_len)) { 3777 *xe_index = i; 3778 *found = 1; 3779 ret = 0; 3780 break; 3781 } 3782 } 3783 3784 return ret; 3785 } 3786 3787 /* 3788 * Find the specified xattr entry in a series of buckets. 3789 * This series start from p_blkno and last for num_clusters. 3790 * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains 3791 * the num of the valid buckets. 3792 * 3793 * Return the buffer_head this xattr should reside in. And if the xattr's 3794 * hash is in the gap of 2 buckets, return the lower bucket. 3795 */ 3796 static int ocfs2_xattr_bucket_find(struct inode *inode, 3797 int name_index, 3798 const char *name, 3799 u32 name_hash, 3800 u64 p_blkno, 3801 u32 first_hash, 3802 u32 num_clusters, 3803 struct ocfs2_xattr_search *xs) 3804 { 3805 int ret, found = 0; 3806 struct ocfs2_xattr_header *xh = NULL; 3807 struct ocfs2_xattr_entry *xe = NULL; 3808 u16 index = 0; 3809 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3810 int low_bucket = 0, bucket, high_bucket; 3811 struct ocfs2_xattr_bucket *search; 3812 u32 last_hash; 3813 u64 blkno, lower_blkno = 0; 3814 3815 search = ocfs2_xattr_bucket_new(inode); 3816 if (!search) { 3817 ret = -ENOMEM; 3818 mlog_errno(ret); 3819 goto out; 3820 } 3821 3822 ret = ocfs2_read_xattr_bucket(search, p_blkno); 3823 if (ret) { 3824 mlog_errno(ret); 3825 goto out; 3826 } 3827 3828 xh = bucket_xh(search); 3829 high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1; 3830 while (low_bucket <= high_bucket) { 3831 ocfs2_xattr_bucket_relse(search); 3832 3833 bucket = (low_bucket + high_bucket) / 2; 3834 blkno = p_blkno + bucket * blk_per_bucket; 3835 ret = ocfs2_read_xattr_bucket(search, blkno); 3836 if (ret) { 3837 mlog_errno(ret); 3838 goto out; 3839 } 3840 3841 xh = bucket_xh(search); 3842 xe = &xh->xh_entries[0]; 3843 if (name_hash < le32_to_cpu(xe->xe_name_hash)) { 3844 high_bucket = bucket - 1; 3845 continue; 3846 } 3847 3848 /* 3849 * Check whether the hash of the last entry in our 3850 * bucket is larger than the search one. for an empty 3851 * bucket, the last one is also the first one. 3852 */ 3853 if (xh->xh_count) 3854 xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1]; 3855 3856 last_hash = le32_to_cpu(xe->xe_name_hash); 3857 3858 /* record lower_blkno which may be the insert place. */ 3859 lower_blkno = blkno; 3860 3861 if (name_hash > le32_to_cpu(xe->xe_name_hash)) { 3862 low_bucket = bucket + 1; 3863 continue; 3864 } 3865 3866 /* the searched xattr should reside in this bucket if exists. */ 3867 ret = ocfs2_find_xe_in_bucket(inode, search, 3868 name_index, name, name_hash, 3869 &index, &found); 3870 if (ret) { 3871 mlog_errno(ret); 3872 goto out; 3873 } 3874 break; 3875 } 3876 3877 /* 3878 * Record the bucket we have found. 3879 * When the xattr's hash value is in the gap of 2 buckets, we will 3880 * always set it to the previous bucket. 3881 */ 3882 if (!lower_blkno) 3883 lower_blkno = p_blkno; 3884 3885 /* This should be in cache - we just read it during the search */ 3886 ret = ocfs2_read_xattr_bucket(xs->bucket, lower_blkno); 3887 if (ret) { 3888 mlog_errno(ret); 3889 goto out; 3890 } 3891 3892 xs->header = bucket_xh(xs->bucket); 3893 xs->base = bucket_block(xs->bucket, 0); 3894 xs->end = xs->base + inode->i_sb->s_blocksize; 3895 3896 if (found) { 3897 xs->here = &xs->header->xh_entries[index]; 3898 trace_ocfs2_xattr_bucket_find(OCFS2_I(inode)->ip_blkno, 3899 name, name_index, name_hash, 3900 (unsigned long long)bucket_blkno(xs->bucket), 3901 index); 3902 } else 3903 ret = -ENODATA; 3904 3905 out: 3906 ocfs2_xattr_bucket_free(search); 3907 return ret; 3908 } 3909 3910 static int ocfs2_xattr_index_block_find(struct inode *inode, 3911 struct buffer_head *root_bh, 3912 int name_index, 3913 const char *name, 3914 struct ocfs2_xattr_search *xs) 3915 { 3916 int ret; 3917 struct ocfs2_xattr_block *xb = 3918 (struct ocfs2_xattr_block *)root_bh->b_data; 3919 struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root; 3920 struct ocfs2_extent_list *el = &xb_root->xt_list; 3921 u64 p_blkno = 0; 3922 u32 first_hash, num_clusters = 0; 3923 u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name)); 3924 3925 if (le16_to_cpu(el->l_next_free_rec) == 0) 3926 return -ENODATA; 3927 3928 trace_ocfs2_xattr_index_block_find(OCFS2_I(inode)->ip_blkno, 3929 name, name_index, name_hash, 3930 (unsigned long long)root_bh->b_blocknr, 3931 -1); 3932 3933 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash, 3934 &num_clusters, el); 3935 if (ret) { 3936 mlog_errno(ret); 3937 goto out; 3938 } 3939 3940 BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash); 3941 3942 trace_ocfs2_xattr_index_block_find_rec(OCFS2_I(inode)->ip_blkno, 3943 name, name_index, first_hash, 3944 (unsigned long long)p_blkno, 3945 num_clusters); 3946 3947 ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash, 3948 p_blkno, first_hash, num_clusters, xs); 3949 3950 out: 3951 return ret; 3952 } 3953 3954 static int ocfs2_iterate_xattr_buckets(struct inode *inode, 3955 u64 blkno, 3956 u32 clusters, 3957 xattr_bucket_func *func, 3958 void *para) 3959 { 3960 int i, ret = 0; 3961 u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)); 3962 u32 num_buckets = clusters * bpc; 3963 struct ocfs2_xattr_bucket *bucket; 3964 3965 bucket = ocfs2_xattr_bucket_new(inode); 3966 if (!bucket) { 3967 mlog_errno(-ENOMEM); 3968 return -ENOMEM; 3969 } 3970 3971 trace_ocfs2_iterate_xattr_buckets( 3972 (unsigned long long)OCFS2_I(inode)->ip_blkno, 3973 (unsigned long long)blkno, clusters); 3974 3975 for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) { 3976 ret = ocfs2_read_xattr_bucket(bucket, blkno); 3977 if (ret) { 3978 mlog_errno(ret); 3979 break; 3980 } 3981 3982 /* 3983 * The real bucket num in this series of blocks is stored 3984 * in the 1st bucket. 3985 */ 3986 if (i == 0) 3987 num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets); 3988 3989 trace_ocfs2_iterate_xattr_bucket((unsigned long long)blkno, 3990 le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash)); 3991 if (func) { 3992 ret = func(inode, bucket, para); 3993 if (ret && ret != -ERANGE) 3994 mlog_errno(ret); 3995 /* Fall through to bucket_relse() */ 3996 } 3997 3998 ocfs2_xattr_bucket_relse(bucket); 3999 if (ret) 4000 break; 4001 } 4002 4003 ocfs2_xattr_bucket_free(bucket); 4004 return ret; 4005 } 4006 4007 struct ocfs2_xattr_tree_list { 4008 char *buffer; 4009 size_t buffer_size; 4010 size_t result; 4011 }; 4012 4013 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb, 4014 struct ocfs2_xattr_header *xh, 4015 int index, 4016 int *block_off, 4017 int *new_offset) 4018 { 4019 u16 name_offset; 4020 4021 if (index < 0 || index >= le16_to_cpu(xh->xh_count)) 4022 return -EINVAL; 4023 4024 name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset); 4025 4026 *block_off = name_offset >> sb->s_blocksize_bits; 4027 *new_offset = name_offset % sb->s_blocksize; 4028 4029 return 0; 4030 } 4031 4032 static int ocfs2_list_xattr_bucket(struct inode *inode, 4033 struct ocfs2_xattr_bucket *bucket, 4034 void *para) 4035 { 4036 int ret = 0, type; 4037 struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para; 4038 int i, block_off, new_offset; 4039 const char *prefix, *name; 4040 4041 for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) { 4042 struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i]; 4043 type = ocfs2_xattr_get_type(entry); 4044 prefix = ocfs2_xattr_prefix(type); 4045 4046 if (prefix) { 4047 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 4048 bucket_xh(bucket), 4049 i, 4050 &block_off, 4051 &new_offset); 4052 if (ret) 4053 break; 4054 4055 name = (const char *)bucket_block(bucket, block_off) + 4056 new_offset; 4057 ret = ocfs2_xattr_list_entry(xl->buffer, 4058 xl->buffer_size, 4059 &xl->result, 4060 prefix, name, 4061 entry->xe_name_len); 4062 if (ret) 4063 break; 4064 } 4065 } 4066 4067 return ret; 4068 } 4069 4070 static int ocfs2_iterate_xattr_index_block(struct inode *inode, 4071 struct buffer_head *blk_bh, 4072 xattr_tree_rec_func *rec_func, 4073 void *para) 4074 { 4075 struct ocfs2_xattr_block *xb = 4076 (struct ocfs2_xattr_block *)blk_bh->b_data; 4077 struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list; 4078 int ret = 0; 4079 u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0; 4080 u64 p_blkno = 0; 4081 4082 if (!el->l_next_free_rec || !rec_func) 4083 return 0; 4084 4085 while (name_hash > 0) { 4086 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, 4087 &e_cpos, &num_clusters, el); 4088 if (ret) { 4089 mlog_errno(ret); 4090 break; 4091 } 4092 4093 ret = rec_func(inode, blk_bh, p_blkno, e_cpos, 4094 num_clusters, para); 4095 if (ret) { 4096 if (ret != -ERANGE) 4097 mlog_errno(ret); 4098 break; 4099 } 4100 4101 if (e_cpos == 0) 4102 break; 4103 4104 name_hash = e_cpos - 1; 4105 } 4106 4107 return ret; 4108 4109 } 4110 4111 static int ocfs2_list_xattr_tree_rec(struct inode *inode, 4112 struct buffer_head *root_bh, 4113 u64 blkno, u32 cpos, u32 len, void *para) 4114 { 4115 return ocfs2_iterate_xattr_buckets(inode, blkno, len, 4116 ocfs2_list_xattr_bucket, para); 4117 } 4118 4119 static int ocfs2_xattr_tree_list_index_block(struct inode *inode, 4120 struct buffer_head *blk_bh, 4121 char *buffer, 4122 size_t buffer_size) 4123 { 4124 int ret; 4125 struct ocfs2_xattr_tree_list xl = { 4126 .buffer = buffer, 4127 .buffer_size = buffer_size, 4128 .result = 0, 4129 }; 4130 4131 ret = ocfs2_iterate_xattr_index_block(inode, blk_bh, 4132 ocfs2_list_xattr_tree_rec, &xl); 4133 if (ret) { 4134 mlog_errno(ret); 4135 goto out; 4136 } 4137 4138 ret = xl.result; 4139 out: 4140 return ret; 4141 } 4142 4143 static int cmp_xe(const void *a, const void *b) 4144 { 4145 const struct ocfs2_xattr_entry *l = a, *r = b; 4146 u32 l_hash = le32_to_cpu(l->xe_name_hash); 4147 u32 r_hash = le32_to_cpu(r->xe_name_hash); 4148 4149 if (l_hash > r_hash) 4150 return 1; 4151 if (l_hash < r_hash) 4152 return -1; 4153 return 0; 4154 } 4155 4156 static void swap_xe(void *a, void *b, int size) 4157 { 4158 struct ocfs2_xattr_entry *l = a, *r = b, tmp; 4159 4160 tmp = *l; 4161 memcpy(l, r, sizeof(struct ocfs2_xattr_entry)); 4162 memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry)); 4163 } 4164 4165 /* 4166 * When the ocfs2_xattr_block is filled up, new bucket will be created 4167 * and all the xattr entries will be moved to the new bucket. 4168 * The header goes at the start of the bucket, and the names+values are 4169 * filled from the end. This is why *target starts as the last buffer. 4170 * Note: we need to sort the entries since they are not saved in order 4171 * in the ocfs2_xattr_block. 4172 */ 4173 static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode, 4174 struct buffer_head *xb_bh, 4175 struct ocfs2_xattr_bucket *bucket) 4176 { 4177 int i, blocksize = inode->i_sb->s_blocksize; 4178 int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 4179 u16 offset, size, off_change; 4180 struct ocfs2_xattr_entry *xe; 4181 struct ocfs2_xattr_block *xb = 4182 (struct ocfs2_xattr_block *)xb_bh->b_data; 4183 struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header; 4184 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 4185 u16 count = le16_to_cpu(xb_xh->xh_count); 4186 char *src = xb_bh->b_data; 4187 char *target = bucket_block(bucket, blks - 1); 4188 4189 trace_ocfs2_cp_xattr_block_to_bucket_begin( 4190 (unsigned long long)xb_bh->b_blocknr, 4191 (unsigned long long)bucket_blkno(bucket)); 4192 4193 for (i = 0; i < blks; i++) 4194 memset(bucket_block(bucket, i), 0, blocksize); 4195 4196 /* 4197 * Since the xe_name_offset is based on ocfs2_xattr_header, 4198 * there is a offset change corresponding to the change of 4199 * ocfs2_xattr_header's position. 4200 */ 4201 off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); 4202 xe = &xb_xh->xh_entries[count - 1]; 4203 offset = le16_to_cpu(xe->xe_name_offset) + off_change; 4204 size = blocksize - offset; 4205 4206 /* copy all the names and values. */ 4207 memcpy(target + offset, src + offset, size); 4208 4209 /* Init new header now. */ 4210 xh->xh_count = xb_xh->xh_count; 4211 xh->xh_num_buckets = cpu_to_le16(1); 4212 xh->xh_name_value_len = cpu_to_le16(size); 4213 xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size); 4214 4215 /* copy all the entries. */ 4216 target = bucket_block(bucket, 0); 4217 offset = offsetof(struct ocfs2_xattr_header, xh_entries); 4218 size = count * sizeof(struct ocfs2_xattr_entry); 4219 memcpy(target + offset, (char *)xb_xh + offset, size); 4220 4221 /* Change the xe offset for all the xe because of the move. */ 4222 off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize + 4223 offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); 4224 for (i = 0; i < count; i++) 4225 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change); 4226 4227 trace_ocfs2_cp_xattr_block_to_bucket_end(offset, size, off_change); 4228 4229 sort(target + offset, count, sizeof(struct ocfs2_xattr_entry), 4230 cmp_xe, swap_xe); 4231 } 4232 4233 /* 4234 * After we move xattr from block to index btree, we have to 4235 * update ocfs2_xattr_search to the new xe and base. 4236 * 4237 * When the entry is in xattr block, xattr_bh indicates the storage place. 4238 * While if the entry is in index b-tree, "bucket" indicates the 4239 * real place of the xattr. 4240 */ 4241 static void ocfs2_xattr_update_xattr_search(struct inode *inode, 4242 struct ocfs2_xattr_search *xs, 4243 struct buffer_head *old_bh) 4244 { 4245 char *buf = old_bh->b_data; 4246 struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf; 4247 struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header; 4248 int i; 4249 4250 xs->header = bucket_xh(xs->bucket); 4251 xs->base = bucket_block(xs->bucket, 0); 4252 xs->end = xs->base + inode->i_sb->s_blocksize; 4253 4254 if (xs->not_found) 4255 return; 4256 4257 i = xs->here - old_xh->xh_entries; 4258 xs->here = &xs->header->xh_entries[i]; 4259 } 4260 4261 static int ocfs2_xattr_create_index_block(struct inode *inode, 4262 struct ocfs2_xattr_search *xs, 4263 struct ocfs2_xattr_set_ctxt *ctxt) 4264 { 4265 int ret; 4266 u32 bit_off, len; 4267 u64 blkno; 4268 handle_t *handle = ctxt->handle; 4269 struct ocfs2_inode_info *oi = OCFS2_I(inode); 4270 struct buffer_head *xb_bh = xs->xattr_bh; 4271 struct ocfs2_xattr_block *xb = 4272 (struct ocfs2_xattr_block *)xb_bh->b_data; 4273 struct ocfs2_xattr_tree_root *xr; 4274 u16 xb_flags = le16_to_cpu(xb->xb_flags); 4275 4276 trace_ocfs2_xattr_create_index_block_begin( 4277 (unsigned long long)xb_bh->b_blocknr); 4278 4279 BUG_ON(xb_flags & OCFS2_XATTR_INDEXED); 4280 BUG_ON(!xs->bucket); 4281 4282 /* 4283 * XXX: 4284 * We can use this lock for now, and maybe move to a dedicated mutex 4285 * if performance becomes a problem later. 4286 */ 4287 down_write(&oi->ip_alloc_sem); 4288 4289 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), xb_bh, 4290 OCFS2_JOURNAL_ACCESS_WRITE); 4291 if (ret) { 4292 mlog_errno(ret); 4293 goto out; 4294 } 4295 4296 ret = __ocfs2_claim_clusters(handle, ctxt->data_ac, 4297 1, 1, &bit_off, &len); 4298 if (ret) { 4299 mlog_errno(ret); 4300 goto out; 4301 } 4302 4303 /* 4304 * The bucket may spread in many blocks, and 4305 * we will only touch the 1st block and the last block 4306 * in the whole bucket(one for entry and one for data). 4307 */ 4308 blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off); 4309 4310 trace_ocfs2_xattr_create_index_block((unsigned long long)blkno); 4311 4312 ret = ocfs2_init_xattr_bucket(xs->bucket, blkno, 1); 4313 if (ret) { 4314 mlog_errno(ret); 4315 goto out; 4316 } 4317 4318 ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket, 4319 OCFS2_JOURNAL_ACCESS_CREATE); 4320 if (ret) { 4321 mlog_errno(ret); 4322 goto out; 4323 } 4324 4325 ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket); 4326 ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket); 4327 4328 ocfs2_xattr_update_xattr_search(inode, xs, xb_bh); 4329 4330 /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */ 4331 memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize - 4332 offsetof(struct ocfs2_xattr_block, xb_attrs)); 4333 4334 xr = &xb->xb_attrs.xb_root; 4335 xr->xt_clusters = cpu_to_le32(1); 4336 xr->xt_last_eb_blk = 0; 4337 xr->xt_list.l_tree_depth = 0; 4338 xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb)); 4339 xr->xt_list.l_next_free_rec = cpu_to_le16(1); 4340 4341 xr->xt_list.l_recs[0].e_cpos = 0; 4342 xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno); 4343 xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1); 4344 4345 xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED); 4346 4347 ocfs2_journal_dirty(handle, xb_bh); 4348 4349 out: 4350 up_write(&oi->ip_alloc_sem); 4351 4352 return ret; 4353 } 4354 4355 static int cmp_xe_offset(const void *a, const void *b) 4356 { 4357 const struct ocfs2_xattr_entry *l = a, *r = b; 4358 u32 l_name_offset = le16_to_cpu(l->xe_name_offset); 4359 u32 r_name_offset = le16_to_cpu(r->xe_name_offset); 4360 4361 if (l_name_offset < r_name_offset) 4362 return 1; 4363 if (l_name_offset > r_name_offset) 4364 return -1; 4365 return 0; 4366 } 4367 4368 /* 4369 * defrag a xattr bucket if we find that the bucket has some 4370 * holes beteen name/value pairs. 4371 * We will move all the name/value pairs to the end of the bucket 4372 * so that we can spare some space for insertion. 4373 */ 4374 static int ocfs2_defrag_xattr_bucket(struct inode *inode, 4375 handle_t *handle, 4376 struct ocfs2_xattr_bucket *bucket) 4377 { 4378 int ret, i; 4379 size_t end, offset, len; 4380 struct ocfs2_xattr_header *xh; 4381 char *entries, *buf, *bucket_buf = NULL; 4382 u64 blkno = bucket_blkno(bucket); 4383 u16 xh_free_start; 4384 size_t blocksize = inode->i_sb->s_blocksize; 4385 struct ocfs2_xattr_entry *xe; 4386 4387 /* 4388 * In order to make the operation more efficient and generic, 4389 * we copy all the blocks into a contiguous memory and do the 4390 * defragment there, so if anything is error, we will not touch 4391 * the real block. 4392 */ 4393 bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS); 4394 if (!bucket_buf) { 4395 ret = -EIO; 4396 goto out; 4397 } 4398 4399 buf = bucket_buf; 4400 for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize) 4401 memcpy(buf, bucket_block(bucket, i), blocksize); 4402 4403 ret = ocfs2_xattr_bucket_journal_access(handle, bucket, 4404 OCFS2_JOURNAL_ACCESS_WRITE); 4405 if (ret < 0) { 4406 mlog_errno(ret); 4407 goto out; 4408 } 4409 4410 xh = (struct ocfs2_xattr_header *)bucket_buf; 4411 entries = (char *)xh->xh_entries; 4412 xh_free_start = le16_to_cpu(xh->xh_free_start); 4413 4414 trace_ocfs2_defrag_xattr_bucket( 4415 (unsigned long long)blkno, le16_to_cpu(xh->xh_count), 4416 xh_free_start, le16_to_cpu(xh->xh_name_value_len)); 4417 4418 /* 4419 * sort all the entries by their offset. 4420 * the largest will be the first, so that we can 4421 * move them to the end one by one. 4422 */ 4423 sort(entries, le16_to_cpu(xh->xh_count), 4424 sizeof(struct ocfs2_xattr_entry), 4425 cmp_xe_offset, swap_xe); 4426 4427 /* Move all name/values to the end of the bucket. */ 4428 xe = xh->xh_entries; 4429 end = OCFS2_XATTR_BUCKET_SIZE; 4430 for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) { 4431 offset = le16_to_cpu(xe->xe_name_offset); 4432 len = namevalue_size_xe(xe); 4433 4434 /* 4435 * We must make sure that the name/value pair 4436 * exist in the same block. So adjust end to 4437 * the previous block end if needed. 4438 */ 4439 if (((end - len) / blocksize != 4440 (end - 1) / blocksize)) 4441 end = end - end % blocksize; 4442 4443 if (end > offset + len) { 4444 memmove(bucket_buf + end - len, 4445 bucket_buf + offset, len); 4446 xe->xe_name_offset = cpu_to_le16(end - len); 4447 } 4448 4449 mlog_bug_on_msg(end < offset + len, "Defrag check failed for " 4450 "bucket %llu\n", (unsigned long long)blkno); 4451 4452 end -= len; 4453 } 4454 4455 mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for " 4456 "bucket %llu\n", (unsigned long long)blkno); 4457 4458 if (xh_free_start == end) 4459 goto out; 4460 4461 memset(bucket_buf + xh_free_start, 0, end - xh_free_start); 4462 xh->xh_free_start = cpu_to_le16(end); 4463 4464 /* sort the entries by their name_hash. */ 4465 sort(entries, le16_to_cpu(xh->xh_count), 4466 sizeof(struct ocfs2_xattr_entry), 4467 cmp_xe, swap_xe); 4468 4469 buf = bucket_buf; 4470 for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize) 4471 memcpy(bucket_block(bucket, i), buf, blocksize); 4472 ocfs2_xattr_bucket_journal_dirty(handle, bucket); 4473 4474 out: 4475 kfree(bucket_buf); 4476 return ret; 4477 } 4478 4479 /* 4480 * prev_blkno points to the start of an existing extent. new_blkno 4481 * points to a newly allocated extent. Because we know each of our 4482 * clusters contains more than bucket, we can easily split one cluster 4483 * at a bucket boundary. So we take the last cluster of the existing 4484 * extent and split it down the middle. We move the last half of the 4485 * buckets in the last cluster of the existing extent over to the new 4486 * extent. 4487 * 4488 * first_bh is the buffer at prev_blkno so we can update the existing 4489 * extent's bucket count. header_bh is the bucket were we were hoping 4490 * to insert our xattr. If the bucket move places the target in the new 4491 * extent, we'll update first_bh and header_bh after modifying the old 4492 * extent. 4493 * 4494 * first_hash will be set as the 1st xe's name_hash in the new extent. 4495 */ 4496 static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode, 4497 handle_t *handle, 4498 struct ocfs2_xattr_bucket *first, 4499 struct ocfs2_xattr_bucket *target, 4500 u64 new_blkno, 4501 u32 num_clusters, 4502 u32 *first_hash) 4503 { 4504 int ret; 4505 struct super_block *sb = inode->i_sb; 4506 int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(sb); 4507 int num_buckets = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb)); 4508 int to_move = num_buckets / 2; 4509 u64 src_blkno; 4510 u64 last_cluster_blkno = bucket_blkno(first) + 4511 ((num_clusters - 1) * ocfs2_clusters_to_blocks(sb, 1)); 4512 4513 BUG_ON(le16_to_cpu(bucket_xh(first)->xh_num_buckets) < num_buckets); 4514 BUG_ON(OCFS2_XATTR_BUCKET_SIZE == OCFS2_SB(sb)->s_clustersize); 4515 4516 trace_ocfs2_mv_xattr_bucket_cross_cluster( 4517 (unsigned long long)last_cluster_blkno, 4518 (unsigned long long)new_blkno); 4519 4520 ret = ocfs2_mv_xattr_buckets(inode, handle, bucket_blkno(first), 4521 last_cluster_blkno, new_blkno, 4522 to_move, first_hash); 4523 if (ret) { 4524 mlog_errno(ret); 4525 goto out; 4526 } 4527 4528 /* This is the first bucket that got moved */ 4529 src_blkno = last_cluster_blkno + (to_move * blks_per_bucket); 4530 4531 /* 4532 * If the target bucket was part of the moved buckets, we need to 4533 * update first and target. 4534 */ 4535 if (bucket_blkno(target) >= src_blkno) { 4536 /* Find the block for the new target bucket */ 4537 src_blkno = new_blkno + 4538 (bucket_blkno(target) - src_blkno); 4539 4540 ocfs2_xattr_bucket_relse(first); 4541 ocfs2_xattr_bucket_relse(target); 4542 4543 /* 4544 * These shouldn't fail - the buffers are in the 4545 * journal from ocfs2_cp_xattr_bucket(). 4546 */ 4547 ret = ocfs2_read_xattr_bucket(first, new_blkno); 4548 if (ret) { 4549 mlog_errno(ret); 4550 goto out; 4551 } 4552 ret = ocfs2_read_xattr_bucket(target, src_blkno); 4553 if (ret) 4554 mlog_errno(ret); 4555 4556 } 4557 4558 out: 4559 return ret; 4560 } 4561 4562 /* 4563 * Find the suitable pos when we divide a bucket into 2. 4564 * We have to make sure the xattrs with the same hash value exist 4565 * in the same bucket. 4566 * 4567 * If this ocfs2_xattr_header covers more than one hash value, find a 4568 * place where the hash value changes. Try to find the most even split. 4569 * The most common case is that all entries have different hash values, 4570 * and the first check we make will find a place to split. 4571 */ 4572 static int ocfs2_xattr_find_divide_pos(struct ocfs2_xattr_header *xh) 4573 { 4574 struct ocfs2_xattr_entry *entries = xh->xh_entries; 4575 int count = le16_to_cpu(xh->xh_count); 4576 int delta, middle = count / 2; 4577 4578 /* 4579 * We start at the middle. Each step gets farther away in both 4580 * directions. We therefore hit the change in hash value 4581 * nearest to the middle. Note that this loop does not execute for 4582 * count < 2. 4583 */ 4584 for (delta = 0; delta < middle; delta++) { 4585 /* Let's check delta earlier than middle */ 4586 if (cmp_xe(&entries[middle - delta - 1], 4587 &entries[middle - delta])) 4588 return middle - delta; 4589 4590 /* For even counts, don't walk off the end */ 4591 if ((middle + delta + 1) == count) 4592 continue; 4593 4594 /* Now try delta past middle */ 4595 if (cmp_xe(&entries[middle + delta], 4596 &entries[middle + delta + 1])) 4597 return middle + delta + 1; 4598 } 4599 4600 /* Every entry had the same hash */ 4601 return count; 4602 } 4603 4604 /* 4605 * Move some xattrs in old bucket(blk) to new bucket(new_blk). 4606 * first_hash will record the 1st hash of the new bucket. 4607 * 4608 * Normally half of the xattrs will be moved. But we have to make 4609 * sure that the xattrs with the same hash value are stored in the 4610 * same bucket. If all the xattrs in this bucket have the same hash 4611 * value, the new bucket will be initialized as an empty one and the 4612 * first_hash will be initialized as (hash_value+1). 4613 */ 4614 static int ocfs2_divide_xattr_bucket(struct inode *inode, 4615 handle_t *handle, 4616 u64 blk, 4617 u64 new_blk, 4618 u32 *first_hash, 4619 int new_bucket_head) 4620 { 4621 int ret, i; 4622 int count, start, len, name_value_len = 0, name_offset = 0; 4623 struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL; 4624 struct ocfs2_xattr_header *xh; 4625 struct ocfs2_xattr_entry *xe; 4626 int blocksize = inode->i_sb->s_blocksize; 4627 4628 trace_ocfs2_divide_xattr_bucket_begin((unsigned long long)blk, 4629 (unsigned long long)new_blk); 4630 4631 s_bucket = ocfs2_xattr_bucket_new(inode); 4632 t_bucket = ocfs2_xattr_bucket_new(inode); 4633 if (!s_bucket || !t_bucket) { 4634 ret = -ENOMEM; 4635 mlog_errno(ret); 4636 goto out; 4637 } 4638 4639 ret = ocfs2_read_xattr_bucket(s_bucket, blk); 4640 if (ret) { 4641 mlog_errno(ret); 4642 goto out; 4643 } 4644 4645 ret = ocfs2_xattr_bucket_journal_access(handle, s_bucket, 4646 OCFS2_JOURNAL_ACCESS_WRITE); 4647 if (ret) { 4648 mlog_errno(ret); 4649 goto out; 4650 } 4651 4652 /* 4653 * Even if !new_bucket_head, we're overwriting t_bucket. Thus, 4654 * there's no need to read it. 4655 */ 4656 ret = ocfs2_init_xattr_bucket(t_bucket, new_blk, new_bucket_head); 4657 if (ret) { 4658 mlog_errno(ret); 4659 goto out; 4660 } 4661 4662 /* 4663 * Hey, if we're overwriting t_bucket, what difference does 4664 * ACCESS_CREATE vs ACCESS_WRITE make? See the comment in the 4665 * same part of ocfs2_cp_xattr_bucket(). 4666 */ 4667 ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket, 4668 new_bucket_head ? 4669 OCFS2_JOURNAL_ACCESS_CREATE : 4670 OCFS2_JOURNAL_ACCESS_WRITE); 4671 if (ret) { 4672 mlog_errno(ret); 4673 goto out; 4674 } 4675 4676 xh = bucket_xh(s_bucket); 4677 count = le16_to_cpu(xh->xh_count); 4678 start = ocfs2_xattr_find_divide_pos(xh); 4679 4680 if (start == count) { 4681 xe = &xh->xh_entries[start-1]; 4682 4683 /* 4684 * initialized a new empty bucket here. 4685 * The hash value is set as one larger than 4686 * that of the last entry in the previous bucket. 4687 */ 4688 for (i = 0; i < t_bucket->bu_blocks; i++) 4689 memset(bucket_block(t_bucket, i), 0, blocksize); 4690 4691 xh = bucket_xh(t_bucket); 4692 xh->xh_free_start = cpu_to_le16(blocksize); 4693 xh->xh_entries[0].xe_name_hash = xe->xe_name_hash; 4694 le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1); 4695 4696 goto set_num_buckets; 4697 } 4698 4699 /* copy the whole bucket to the new first. */ 4700 ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket); 4701 4702 /* update the new bucket. */ 4703 xh = bucket_xh(t_bucket); 4704 4705 /* 4706 * Calculate the total name/value len and xh_free_start for 4707 * the old bucket first. 4708 */ 4709 name_offset = OCFS2_XATTR_BUCKET_SIZE; 4710 name_value_len = 0; 4711 for (i = 0; i < start; i++) { 4712 xe = &xh->xh_entries[i]; 4713 name_value_len += namevalue_size_xe(xe); 4714 if (le16_to_cpu(xe->xe_name_offset) < name_offset) 4715 name_offset = le16_to_cpu(xe->xe_name_offset); 4716 } 4717 4718 /* 4719 * Now begin the modification to the new bucket. 4720 * 4721 * In the new bucket, We just move the xattr entry to the beginning 4722 * and don't touch the name/value. So there will be some holes in the 4723 * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is 4724 * called. 4725 */ 4726 xe = &xh->xh_entries[start]; 4727 len = sizeof(struct ocfs2_xattr_entry) * (count - start); 4728 trace_ocfs2_divide_xattr_bucket_move(len, 4729 (int)((char *)xe - (char *)xh), 4730 (int)((char *)xh->xh_entries - (char *)xh)); 4731 memmove((char *)xh->xh_entries, (char *)xe, len); 4732 xe = &xh->xh_entries[count - start]; 4733 len = sizeof(struct ocfs2_xattr_entry) * start; 4734 memset((char *)xe, 0, len); 4735 4736 le16_add_cpu(&xh->xh_count, -start); 4737 le16_add_cpu(&xh->xh_name_value_len, -name_value_len); 4738 4739 /* Calculate xh_free_start for the new bucket. */ 4740 xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE); 4741 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 4742 xe = &xh->xh_entries[i]; 4743 if (le16_to_cpu(xe->xe_name_offset) < 4744 le16_to_cpu(xh->xh_free_start)) 4745 xh->xh_free_start = xe->xe_name_offset; 4746 } 4747 4748 set_num_buckets: 4749 /* set xh->xh_num_buckets for the new xh. */ 4750 if (new_bucket_head) 4751 xh->xh_num_buckets = cpu_to_le16(1); 4752 else 4753 xh->xh_num_buckets = 0; 4754 4755 ocfs2_xattr_bucket_journal_dirty(handle, t_bucket); 4756 4757 /* store the first_hash of the new bucket. */ 4758 if (first_hash) 4759 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash); 4760 4761 /* 4762 * Now only update the 1st block of the old bucket. If we 4763 * just added a new empty bucket, there is no need to modify 4764 * it. 4765 */ 4766 if (start == count) 4767 goto out; 4768 4769 xh = bucket_xh(s_bucket); 4770 memset(&xh->xh_entries[start], 0, 4771 sizeof(struct ocfs2_xattr_entry) * (count - start)); 4772 xh->xh_count = cpu_to_le16(start); 4773 xh->xh_free_start = cpu_to_le16(name_offset); 4774 xh->xh_name_value_len = cpu_to_le16(name_value_len); 4775 4776 ocfs2_xattr_bucket_journal_dirty(handle, s_bucket); 4777 4778 out: 4779 ocfs2_xattr_bucket_free(s_bucket); 4780 ocfs2_xattr_bucket_free(t_bucket); 4781 4782 return ret; 4783 } 4784 4785 /* 4786 * Copy xattr from one bucket to another bucket. 4787 * 4788 * The caller must make sure that the journal transaction 4789 * has enough space for journaling. 4790 */ 4791 static int ocfs2_cp_xattr_bucket(struct inode *inode, 4792 handle_t *handle, 4793 u64 s_blkno, 4794 u64 t_blkno, 4795 int t_is_new) 4796 { 4797 int ret; 4798 struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL; 4799 4800 BUG_ON(s_blkno == t_blkno); 4801 4802 trace_ocfs2_cp_xattr_bucket((unsigned long long)s_blkno, 4803 (unsigned long long)t_blkno, 4804 t_is_new); 4805 4806 s_bucket = ocfs2_xattr_bucket_new(inode); 4807 t_bucket = ocfs2_xattr_bucket_new(inode); 4808 if (!s_bucket || !t_bucket) { 4809 ret = -ENOMEM; 4810 mlog_errno(ret); 4811 goto out; 4812 } 4813 4814 ret = ocfs2_read_xattr_bucket(s_bucket, s_blkno); 4815 if (ret) 4816 goto out; 4817 4818 /* 4819 * Even if !t_is_new, we're overwriting t_bucket. Thus, 4820 * there's no need to read it. 4821 */ 4822 ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno, t_is_new); 4823 if (ret) 4824 goto out; 4825 4826 /* 4827 * Hey, if we're overwriting t_bucket, what difference does 4828 * ACCESS_CREATE vs ACCESS_WRITE make? Well, if we allocated a new 4829 * cluster to fill, we came here from 4830 * ocfs2_mv_xattr_buckets(), and it is really new - 4831 * ACCESS_CREATE is required. But we also might have moved data 4832 * out of t_bucket before extending back into it. 4833 * ocfs2_add_new_xattr_bucket() can do this - its call to 4834 * ocfs2_add_new_xattr_cluster() may have created a new extent 4835 * and copied out the end of the old extent. Then it re-extends 4836 * the old extent back to create space for new xattrs. That's 4837 * how we get here, and the bucket isn't really new. 4838 */ 4839 ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket, 4840 t_is_new ? 4841 OCFS2_JOURNAL_ACCESS_CREATE : 4842 OCFS2_JOURNAL_ACCESS_WRITE); 4843 if (ret) 4844 goto out; 4845 4846 ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket); 4847 ocfs2_xattr_bucket_journal_dirty(handle, t_bucket); 4848 4849 out: 4850 ocfs2_xattr_bucket_free(t_bucket); 4851 ocfs2_xattr_bucket_free(s_bucket); 4852 4853 return ret; 4854 } 4855 4856 /* 4857 * src_blk points to the start of an existing extent. last_blk points to 4858 * last cluster in that extent. to_blk points to a newly allocated 4859 * extent. We copy the buckets from the cluster at last_blk to the new 4860 * extent. If start_bucket is non-zero, we skip that many buckets before 4861 * we start copying. The new extent's xh_num_buckets gets set to the 4862 * number of buckets we copied. The old extent's xh_num_buckets shrinks 4863 * by the same amount. 4864 */ 4865 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle, 4866 u64 src_blk, u64 last_blk, u64 to_blk, 4867 unsigned int start_bucket, 4868 u32 *first_hash) 4869 { 4870 int i, ret, credits; 4871 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 4872 int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 4873 int num_buckets = ocfs2_xattr_buckets_per_cluster(osb); 4874 struct ocfs2_xattr_bucket *old_first, *new_first; 4875 4876 trace_ocfs2_mv_xattr_buckets((unsigned long long)last_blk, 4877 (unsigned long long)to_blk); 4878 4879 BUG_ON(start_bucket >= num_buckets); 4880 if (start_bucket) { 4881 num_buckets -= start_bucket; 4882 last_blk += (start_bucket * blks_per_bucket); 4883 } 4884 4885 /* The first bucket of the original extent */ 4886 old_first = ocfs2_xattr_bucket_new(inode); 4887 /* The first bucket of the new extent */ 4888 new_first = ocfs2_xattr_bucket_new(inode); 4889 if (!old_first || !new_first) { 4890 ret = -ENOMEM; 4891 mlog_errno(ret); 4892 goto out; 4893 } 4894 4895 ret = ocfs2_read_xattr_bucket(old_first, src_blk); 4896 if (ret) { 4897 mlog_errno(ret); 4898 goto out; 4899 } 4900 4901 /* 4902 * We need to update the first bucket of the old extent and all 4903 * the buckets going to the new extent. 4904 */ 4905 credits = ((num_buckets + 1) * blks_per_bucket); 4906 ret = ocfs2_extend_trans(handle, credits); 4907 if (ret) { 4908 mlog_errno(ret); 4909 goto out; 4910 } 4911 4912 ret = ocfs2_xattr_bucket_journal_access(handle, old_first, 4913 OCFS2_JOURNAL_ACCESS_WRITE); 4914 if (ret) { 4915 mlog_errno(ret); 4916 goto out; 4917 } 4918 4919 for (i = 0; i < num_buckets; i++) { 4920 ret = ocfs2_cp_xattr_bucket(inode, handle, 4921 last_blk + (i * blks_per_bucket), 4922 to_blk + (i * blks_per_bucket), 4923 1); 4924 if (ret) { 4925 mlog_errno(ret); 4926 goto out; 4927 } 4928 } 4929 4930 /* 4931 * Get the new bucket ready before we dirty anything 4932 * (This actually shouldn't fail, because we already dirtied 4933 * it once in ocfs2_cp_xattr_bucket()). 4934 */ 4935 ret = ocfs2_read_xattr_bucket(new_first, to_blk); 4936 if (ret) { 4937 mlog_errno(ret); 4938 goto out; 4939 } 4940 ret = ocfs2_xattr_bucket_journal_access(handle, new_first, 4941 OCFS2_JOURNAL_ACCESS_WRITE); 4942 if (ret) { 4943 mlog_errno(ret); 4944 goto out; 4945 } 4946 4947 /* Now update the headers */ 4948 le16_add_cpu(&bucket_xh(old_first)->xh_num_buckets, -num_buckets); 4949 ocfs2_xattr_bucket_journal_dirty(handle, old_first); 4950 4951 bucket_xh(new_first)->xh_num_buckets = cpu_to_le16(num_buckets); 4952 ocfs2_xattr_bucket_journal_dirty(handle, new_first); 4953 4954 if (first_hash) 4955 *first_hash = le32_to_cpu(bucket_xh(new_first)->xh_entries[0].xe_name_hash); 4956 4957 out: 4958 ocfs2_xattr_bucket_free(new_first); 4959 ocfs2_xattr_bucket_free(old_first); 4960 return ret; 4961 } 4962 4963 /* 4964 * Move some xattrs in this cluster to the new cluster. 4965 * This function should only be called when bucket size == cluster size. 4966 * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead. 4967 */ 4968 static int ocfs2_divide_xattr_cluster(struct inode *inode, 4969 handle_t *handle, 4970 u64 prev_blk, 4971 u64 new_blk, 4972 u32 *first_hash) 4973 { 4974 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 4975 int ret, credits = 2 * blk_per_bucket; 4976 4977 BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize); 4978 4979 ret = ocfs2_extend_trans(handle, credits); 4980 if (ret) { 4981 mlog_errno(ret); 4982 return ret; 4983 } 4984 4985 /* Move half of the xattr in start_blk to the next bucket. */ 4986 return ocfs2_divide_xattr_bucket(inode, handle, prev_blk, 4987 new_blk, first_hash, 1); 4988 } 4989 4990 /* 4991 * Move some xattrs from the old cluster to the new one since they are not 4992 * contiguous in ocfs2 xattr tree. 4993 * 4994 * new_blk starts a new separate cluster, and we will move some xattrs from 4995 * prev_blk to it. v_start will be set as the first name hash value in this 4996 * new cluster so that it can be used as e_cpos during tree insertion and 4997 * don't collide with our original b-tree operations. first_bh and header_bh 4998 * will also be updated since they will be used in ocfs2_extend_xattr_bucket 4999 * to extend the insert bucket. 5000 * 5001 * The problem is how much xattr should we move to the new one and when should 5002 * we update first_bh and header_bh? 5003 * 1. If cluster size > bucket size, that means the previous cluster has more 5004 * than 1 bucket, so just move half nums of bucket into the new cluster and 5005 * update the first_bh and header_bh if the insert bucket has been moved 5006 * to the new cluster. 5007 * 2. If cluster_size == bucket_size: 5008 * a) If the previous extent rec has more than one cluster and the insert 5009 * place isn't in the last cluster, copy the entire last cluster to the 5010 * new one. This time, we don't need to upate the first_bh and header_bh 5011 * since they will not be moved into the new cluster. 5012 * b) Otherwise, move the bottom half of the xattrs in the last cluster into 5013 * the new one. And we set the extend flag to zero if the insert place is 5014 * moved into the new allocated cluster since no extend is needed. 5015 */ 5016 static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode, 5017 handle_t *handle, 5018 struct ocfs2_xattr_bucket *first, 5019 struct ocfs2_xattr_bucket *target, 5020 u64 new_blk, 5021 u32 prev_clusters, 5022 u32 *v_start, 5023 int *extend) 5024 { 5025 int ret; 5026 5027 trace_ocfs2_adjust_xattr_cross_cluster( 5028 (unsigned long long)bucket_blkno(first), 5029 (unsigned long long)new_blk, prev_clusters); 5030 5031 if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) { 5032 ret = ocfs2_mv_xattr_bucket_cross_cluster(inode, 5033 handle, 5034 first, target, 5035 new_blk, 5036 prev_clusters, 5037 v_start); 5038 if (ret) 5039 mlog_errno(ret); 5040 } else { 5041 /* The start of the last cluster in the first extent */ 5042 u64 last_blk = bucket_blkno(first) + 5043 ((prev_clusters - 1) * 5044 ocfs2_clusters_to_blocks(inode->i_sb, 1)); 5045 5046 if (prev_clusters > 1 && bucket_blkno(target) != last_blk) { 5047 ret = ocfs2_mv_xattr_buckets(inode, handle, 5048 bucket_blkno(first), 5049 last_blk, new_blk, 0, 5050 v_start); 5051 if (ret) 5052 mlog_errno(ret); 5053 } else { 5054 ret = ocfs2_divide_xattr_cluster(inode, handle, 5055 last_blk, new_blk, 5056 v_start); 5057 if (ret) 5058 mlog_errno(ret); 5059 5060 if ((bucket_blkno(target) == last_blk) && extend) 5061 *extend = 0; 5062 } 5063 } 5064 5065 return ret; 5066 } 5067 5068 /* 5069 * Add a new cluster for xattr storage. 5070 * 5071 * If the new cluster is contiguous with the previous one, it will be 5072 * appended to the same extent record, and num_clusters will be updated. 5073 * If not, we will insert a new extent for it and move some xattrs in 5074 * the last cluster into the new allocated one. 5075 * We also need to limit the maximum size of a btree leaf, otherwise we'll 5076 * lose the benefits of hashing because we'll have to search large leaves. 5077 * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize, 5078 * if it's bigger). 5079 * 5080 * first_bh is the first block of the previous extent rec and header_bh 5081 * indicates the bucket we will insert the new xattrs. They will be updated 5082 * when the header_bh is moved into the new cluster. 5083 */ 5084 static int ocfs2_add_new_xattr_cluster(struct inode *inode, 5085 struct buffer_head *root_bh, 5086 struct ocfs2_xattr_bucket *first, 5087 struct ocfs2_xattr_bucket *target, 5088 u32 *num_clusters, 5089 u32 prev_cpos, 5090 int *extend, 5091 struct ocfs2_xattr_set_ctxt *ctxt) 5092 { 5093 int ret; 5094 u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 5095 u32 prev_clusters = *num_clusters; 5096 u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0; 5097 u64 block; 5098 handle_t *handle = ctxt->handle; 5099 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5100 struct ocfs2_extent_tree et; 5101 5102 trace_ocfs2_add_new_xattr_cluster_begin( 5103 (unsigned long long)OCFS2_I(inode)->ip_blkno, 5104 (unsigned long long)bucket_blkno(first), 5105 prev_cpos, prev_clusters); 5106 5107 ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh); 5108 5109 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh, 5110 OCFS2_JOURNAL_ACCESS_WRITE); 5111 if (ret < 0) { 5112 mlog_errno(ret); 5113 goto leave; 5114 } 5115 5116 ret = __ocfs2_claim_clusters(handle, ctxt->data_ac, 1, 5117 clusters_to_add, &bit_off, &num_bits); 5118 if (ret < 0) { 5119 if (ret != -ENOSPC) 5120 mlog_errno(ret); 5121 goto leave; 5122 } 5123 5124 BUG_ON(num_bits > clusters_to_add); 5125 5126 block = ocfs2_clusters_to_blocks(osb->sb, bit_off); 5127 trace_ocfs2_add_new_xattr_cluster((unsigned long long)block, num_bits); 5128 5129 if (bucket_blkno(first) + (prev_clusters * bpc) == block && 5130 (prev_clusters + num_bits) << osb->s_clustersize_bits <= 5131 OCFS2_MAX_XATTR_TREE_LEAF_SIZE) { 5132 /* 5133 * If this cluster is contiguous with the old one and 5134 * adding this new cluster, we don't surpass the limit of 5135 * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be 5136 * initialized and used like other buckets in the previous 5137 * cluster. 5138 * So add it as a contiguous one. The caller will handle 5139 * its init process. 5140 */ 5141 v_start = prev_cpos + prev_clusters; 5142 *num_clusters = prev_clusters + num_bits; 5143 } else { 5144 ret = ocfs2_adjust_xattr_cross_cluster(inode, 5145 handle, 5146 first, 5147 target, 5148 block, 5149 prev_clusters, 5150 &v_start, 5151 extend); 5152 if (ret) { 5153 mlog_errno(ret); 5154 goto leave; 5155 } 5156 } 5157 5158 trace_ocfs2_add_new_xattr_cluster_insert((unsigned long long)block, 5159 v_start, num_bits); 5160 ret = ocfs2_insert_extent(handle, &et, v_start, block, 5161 num_bits, 0, ctxt->meta_ac); 5162 if (ret < 0) { 5163 mlog_errno(ret); 5164 goto leave; 5165 } 5166 5167 ocfs2_journal_dirty(handle, root_bh); 5168 5169 leave: 5170 return ret; 5171 } 5172 5173 /* 5174 * We are given an extent. 'first' is the bucket at the very front of 5175 * the extent. The extent has space for an additional bucket past 5176 * bucket_xh(first)->xh_num_buckets. 'target_blkno' is the block number 5177 * of the target bucket. We wish to shift every bucket past the target 5178 * down one, filling in that additional space. When we get back to the 5179 * target, we split the target between itself and the now-empty bucket 5180 * at target+1 (aka, target_blkno + blks_per_bucket). 5181 */ 5182 static int ocfs2_extend_xattr_bucket(struct inode *inode, 5183 handle_t *handle, 5184 struct ocfs2_xattr_bucket *first, 5185 u64 target_blk, 5186 u32 num_clusters) 5187 { 5188 int ret, credits; 5189 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5190 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 5191 u64 end_blk; 5192 u16 new_bucket = le16_to_cpu(bucket_xh(first)->xh_num_buckets); 5193 5194 trace_ocfs2_extend_xattr_bucket((unsigned long long)target_blk, 5195 (unsigned long long)bucket_blkno(first), 5196 num_clusters, new_bucket); 5197 5198 /* The extent must have room for an additional bucket */ 5199 BUG_ON(new_bucket >= 5200 (num_clusters * ocfs2_xattr_buckets_per_cluster(osb))); 5201 5202 /* end_blk points to the last existing bucket */ 5203 end_blk = bucket_blkno(first) + ((new_bucket - 1) * blk_per_bucket); 5204 5205 /* 5206 * end_blk is the start of the last existing bucket. 5207 * Thus, (end_blk - target_blk) covers the target bucket and 5208 * every bucket after it up to, but not including, the last 5209 * existing bucket. Then we add the last existing bucket, the 5210 * new bucket, and the first bucket (3 * blk_per_bucket). 5211 */ 5212 credits = (end_blk - target_blk) + (3 * blk_per_bucket); 5213 ret = ocfs2_extend_trans(handle, credits); 5214 if (ret) { 5215 mlog_errno(ret); 5216 goto out; 5217 } 5218 5219 ret = ocfs2_xattr_bucket_journal_access(handle, first, 5220 OCFS2_JOURNAL_ACCESS_WRITE); 5221 if (ret) { 5222 mlog_errno(ret); 5223 goto out; 5224 } 5225 5226 while (end_blk != target_blk) { 5227 ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk, 5228 end_blk + blk_per_bucket, 0); 5229 if (ret) 5230 goto out; 5231 end_blk -= blk_per_bucket; 5232 } 5233 5234 /* Move half of the xattr in target_blkno to the next bucket. */ 5235 ret = ocfs2_divide_xattr_bucket(inode, handle, target_blk, 5236 target_blk + blk_per_bucket, NULL, 0); 5237 5238 le16_add_cpu(&bucket_xh(first)->xh_num_buckets, 1); 5239 ocfs2_xattr_bucket_journal_dirty(handle, first); 5240 5241 out: 5242 return ret; 5243 } 5244 5245 /* 5246 * Add new xattr bucket in an extent record and adjust the buckets 5247 * accordingly. xb_bh is the ocfs2_xattr_block, and target is the 5248 * bucket we want to insert into. 5249 * 5250 * In the easy case, we will move all the buckets after target down by 5251 * one. Half of target's xattrs will be moved to the next bucket. 5252 * 5253 * If current cluster is full, we'll allocate a new one. This may not 5254 * be contiguous. The underlying calls will make sure that there is 5255 * space for the insert, shifting buckets around if necessary. 5256 * 'target' may be moved by those calls. 5257 */ 5258 static int ocfs2_add_new_xattr_bucket(struct inode *inode, 5259 struct buffer_head *xb_bh, 5260 struct ocfs2_xattr_bucket *target, 5261 struct ocfs2_xattr_set_ctxt *ctxt) 5262 { 5263 struct ocfs2_xattr_block *xb = 5264 (struct ocfs2_xattr_block *)xb_bh->b_data; 5265 struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root; 5266 struct ocfs2_extent_list *el = &xb_root->xt_list; 5267 u32 name_hash = 5268 le32_to_cpu(bucket_xh(target)->xh_entries[0].xe_name_hash); 5269 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5270 int ret, num_buckets, extend = 1; 5271 u64 p_blkno; 5272 u32 e_cpos, num_clusters; 5273 /* The bucket at the front of the extent */ 5274 struct ocfs2_xattr_bucket *first; 5275 5276 trace_ocfs2_add_new_xattr_bucket( 5277 (unsigned long long)bucket_blkno(target)); 5278 5279 /* The first bucket of the original extent */ 5280 first = ocfs2_xattr_bucket_new(inode); 5281 if (!first) { 5282 ret = -ENOMEM; 5283 mlog_errno(ret); 5284 goto out; 5285 } 5286 5287 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos, 5288 &num_clusters, el); 5289 if (ret) { 5290 mlog_errno(ret); 5291 goto out; 5292 } 5293 5294 ret = ocfs2_read_xattr_bucket(first, p_blkno); 5295 if (ret) { 5296 mlog_errno(ret); 5297 goto out; 5298 } 5299 5300 num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters; 5301 if (num_buckets == le16_to_cpu(bucket_xh(first)->xh_num_buckets)) { 5302 /* 5303 * This can move first+target if the target bucket moves 5304 * to the new extent. 5305 */ 5306 ret = ocfs2_add_new_xattr_cluster(inode, 5307 xb_bh, 5308 first, 5309 target, 5310 &num_clusters, 5311 e_cpos, 5312 &extend, 5313 ctxt); 5314 if (ret) { 5315 mlog_errno(ret); 5316 goto out; 5317 } 5318 } 5319 5320 if (extend) { 5321 ret = ocfs2_extend_xattr_bucket(inode, 5322 ctxt->handle, 5323 first, 5324 bucket_blkno(target), 5325 num_clusters); 5326 if (ret) 5327 mlog_errno(ret); 5328 } 5329 5330 out: 5331 ocfs2_xattr_bucket_free(first); 5332 5333 return ret; 5334 } 5335 5336 /* 5337 * Truncate the specified xe_off entry in xattr bucket. 5338 * bucket is indicated by header_bh and len is the new length. 5339 * Both the ocfs2_xattr_value_root and the entry will be updated here. 5340 * 5341 * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed. 5342 */ 5343 static int ocfs2_xattr_bucket_value_truncate(struct inode *inode, 5344 struct ocfs2_xattr_bucket *bucket, 5345 int xe_off, 5346 int len, 5347 struct ocfs2_xattr_set_ctxt *ctxt) 5348 { 5349 int ret, offset; 5350 u64 value_blk; 5351 struct ocfs2_xattr_entry *xe; 5352 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 5353 size_t blocksize = inode->i_sb->s_blocksize; 5354 struct ocfs2_xattr_value_buf vb = { 5355 .vb_access = ocfs2_journal_access, 5356 }; 5357 5358 xe = &xh->xh_entries[xe_off]; 5359 5360 BUG_ON(!xe || ocfs2_xattr_is_local(xe)); 5361 5362 offset = le16_to_cpu(xe->xe_name_offset) + 5363 OCFS2_XATTR_SIZE(xe->xe_name_len); 5364 5365 value_blk = offset / blocksize; 5366 5367 /* We don't allow ocfs2_xattr_value to be stored in different block. */ 5368 BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize); 5369 5370 vb.vb_bh = bucket->bu_bhs[value_blk]; 5371 BUG_ON(!vb.vb_bh); 5372 5373 vb.vb_xv = (struct ocfs2_xattr_value_root *) 5374 (vb.vb_bh->b_data + offset % blocksize); 5375 5376 /* 5377 * From here on out we have to dirty the bucket. The generic 5378 * value calls only modify one of the bucket's bhs, but we need 5379 * to send the bucket at once. So if they error, they *could* have 5380 * modified something. We have to assume they did, and dirty 5381 * the whole bucket. This leaves us in a consistent state. 5382 */ 5383 trace_ocfs2_xattr_bucket_value_truncate( 5384 (unsigned long long)bucket_blkno(bucket), xe_off, len); 5385 ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt); 5386 if (ret) { 5387 mlog_errno(ret); 5388 goto out; 5389 } 5390 5391 ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket, 5392 OCFS2_JOURNAL_ACCESS_WRITE); 5393 if (ret) { 5394 mlog_errno(ret); 5395 goto out; 5396 } 5397 5398 xe->xe_value_size = cpu_to_le64(len); 5399 5400 ocfs2_xattr_bucket_journal_dirty(ctxt->handle, bucket); 5401 5402 out: 5403 return ret; 5404 } 5405 5406 static int ocfs2_rm_xattr_cluster(struct inode *inode, 5407 struct buffer_head *root_bh, 5408 u64 blkno, 5409 u32 cpos, 5410 u32 len, 5411 void *para) 5412 { 5413 int ret; 5414 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5415 struct inode *tl_inode = osb->osb_tl_inode; 5416 handle_t *handle; 5417 struct ocfs2_xattr_block *xb = 5418 (struct ocfs2_xattr_block *)root_bh->b_data; 5419 struct ocfs2_alloc_context *meta_ac = NULL; 5420 struct ocfs2_cached_dealloc_ctxt dealloc; 5421 struct ocfs2_extent_tree et; 5422 5423 ret = ocfs2_iterate_xattr_buckets(inode, blkno, len, 5424 ocfs2_delete_xattr_in_bucket, para); 5425 if (ret) { 5426 mlog_errno(ret); 5427 return ret; 5428 } 5429 5430 ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh); 5431 5432 ocfs2_init_dealloc_ctxt(&dealloc); 5433 5434 trace_ocfs2_rm_xattr_cluster( 5435 (unsigned long long)OCFS2_I(inode)->ip_blkno, 5436 (unsigned long long)blkno, cpos, len); 5437 5438 ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), blkno, 5439 len); 5440 5441 ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac); 5442 if (ret) { 5443 mlog_errno(ret); 5444 return ret; 5445 } 5446 5447 mutex_lock(&tl_inode->i_mutex); 5448 5449 if (ocfs2_truncate_log_needs_flush(osb)) { 5450 ret = __ocfs2_flush_truncate_log(osb); 5451 if (ret < 0) { 5452 mlog_errno(ret); 5453 goto out; 5454 } 5455 } 5456 5457 handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb)); 5458 if (IS_ERR(handle)) { 5459 ret = -ENOMEM; 5460 mlog_errno(ret); 5461 goto out; 5462 } 5463 5464 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh, 5465 OCFS2_JOURNAL_ACCESS_WRITE); 5466 if (ret) { 5467 mlog_errno(ret); 5468 goto out_commit; 5469 } 5470 5471 ret = ocfs2_remove_extent(handle, &et, cpos, len, meta_ac, 5472 &dealloc); 5473 if (ret) { 5474 mlog_errno(ret); 5475 goto out_commit; 5476 } 5477 5478 le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len); 5479 ocfs2_journal_dirty(handle, root_bh); 5480 5481 ret = ocfs2_truncate_log_append(osb, handle, blkno, len); 5482 if (ret) 5483 mlog_errno(ret); 5484 ocfs2_update_inode_fsync_trans(handle, inode, 0); 5485 5486 out_commit: 5487 ocfs2_commit_trans(osb, handle); 5488 out: 5489 ocfs2_schedule_truncate_log_flush(osb, 1); 5490 5491 mutex_unlock(&tl_inode->i_mutex); 5492 5493 if (meta_ac) 5494 ocfs2_free_alloc_context(meta_ac); 5495 5496 ocfs2_run_deallocs(osb, &dealloc); 5497 5498 return ret; 5499 } 5500 5501 /* 5502 * check whether the xattr bucket is filled up with the same hash value. 5503 * If we want to insert the xattr with the same hash, return -ENOSPC. 5504 * If we want to insert a xattr with different hash value, go ahead 5505 * and ocfs2_divide_xattr_bucket will handle this. 5506 */ 5507 static int ocfs2_check_xattr_bucket_collision(struct inode *inode, 5508 struct ocfs2_xattr_bucket *bucket, 5509 const char *name) 5510 { 5511 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 5512 u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name)); 5513 5514 if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash)) 5515 return 0; 5516 5517 if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash == 5518 xh->xh_entries[0].xe_name_hash) { 5519 mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, " 5520 "hash = %u\n", 5521 (unsigned long long)bucket_blkno(bucket), 5522 le32_to_cpu(xh->xh_entries[0].xe_name_hash)); 5523 return -ENOSPC; 5524 } 5525 5526 return 0; 5527 } 5528 5529 /* 5530 * Try to set the entry in the current bucket. If we fail, the caller 5531 * will handle getting us another bucket. 5532 */ 5533 static int ocfs2_xattr_set_entry_bucket(struct inode *inode, 5534 struct ocfs2_xattr_info *xi, 5535 struct ocfs2_xattr_search *xs, 5536 struct ocfs2_xattr_set_ctxt *ctxt) 5537 { 5538 int ret; 5539 struct ocfs2_xa_loc loc; 5540 5541 trace_ocfs2_xattr_set_entry_bucket(xi->xi_name); 5542 5543 ocfs2_init_xattr_bucket_xa_loc(&loc, xs->bucket, 5544 xs->not_found ? NULL : xs->here); 5545 ret = ocfs2_xa_set(&loc, xi, ctxt); 5546 if (!ret) { 5547 xs->here = loc.xl_entry; 5548 goto out; 5549 } 5550 if (ret != -ENOSPC) { 5551 mlog_errno(ret); 5552 goto out; 5553 } 5554 5555 /* Ok, we need space. Let's try defragmenting the bucket. */ 5556 ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle, 5557 xs->bucket); 5558 if (ret) { 5559 mlog_errno(ret); 5560 goto out; 5561 } 5562 5563 ret = ocfs2_xa_set(&loc, xi, ctxt); 5564 if (!ret) { 5565 xs->here = loc.xl_entry; 5566 goto out; 5567 } 5568 if (ret != -ENOSPC) 5569 mlog_errno(ret); 5570 5571 5572 out: 5573 return ret; 5574 } 5575 5576 static int ocfs2_xattr_set_entry_index_block(struct inode *inode, 5577 struct ocfs2_xattr_info *xi, 5578 struct ocfs2_xattr_search *xs, 5579 struct ocfs2_xattr_set_ctxt *ctxt) 5580 { 5581 int ret; 5582 5583 trace_ocfs2_xattr_set_entry_index_block(xi->xi_name); 5584 5585 ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt); 5586 if (!ret) 5587 goto out; 5588 if (ret != -ENOSPC) { 5589 mlog_errno(ret); 5590 goto out; 5591 } 5592 5593 /* Ack, need more space. Let's try to get another bucket! */ 5594 5595 /* 5596 * We do not allow for overlapping ranges between buckets. And 5597 * the maximum number of collisions we will allow for then is 5598 * one bucket's worth, so check it here whether we need to 5599 * add a new bucket for the insert. 5600 */ 5601 ret = ocfs2_check_xattr_bucket_collision(inode, 5602 xs->bucket, 5603 xi->xi_name); 5604 if (ret) { 5605 mlog_errno(ret); 5606 goto out; 5607 } 5608 5609 ret = ocfs2_add_new_xattr_bucket(inode, 5610 xs->xattr_bh, 5611 xs->bucket, 5612 ctxt); 5613 if (ret) { 5614 mlog_errno(ret); 5615 goto out; 5616 } 5617 5618 /* 5619 * ocfs2_add_new_xattr_bucket() will have updated 5620 * xs->bucket if it moved, but it will not have updated 5621 * any of the other search fields. Thus, we drop it and 5622 * re-search. Everything should be cached, so it'll be 5623 * quick. 5624 */ 5625 ocfs2_xattr_bucket_relse(xs->bucket); 5626 ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh, 5627 xi->xi_name_index, 5628 xi->xi_name, xs); 5629 if (ret && ret != -ENODATA) 5630 goto out; 5631 xs->not_found = ret; 5632 5633 /* Ok, we have a new bucket, let's try again */ 5634 ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt); 5635 if (ret && (ret != -ENOSPC)) 5636 mlog_errno(ret); 5637 5638 out: 5639 return ret; 5640 } 5641 5642 static int ocfs2_delete_xattr_in_bucket(struct inode *inode, 5643 struct ocfs2_xattr_bucket *bucket, 5644 void *para) 5645 { 5646 int ret = 0, ref_credits; 5647 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 5648 u16 i; 5649 struct ocfs2_xattr_entry *xe; 5650 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5651 struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,}; 5652 int credits = ocfs2_remove_extent_credits(osb->sb) + 5653 ocfs2_blocks_per_xattr_bucket(inode->i_sb); 5654 struct ocfs2_xattr_value_root *xv; 5655 struct ocfs2_rm_xattr_bucket_para *args = 5656 (struct ocfs2_rm_xattr_bucket_para *)para; 5657 5658 ocfs2_init_dealloc_ctxt(&ctxt.dealloc); 5659 5660 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 5661 xe = &xh->xh_entries[i]; 5662 if (ocfs2_xattr_is_local(xe)) 5663 continue; 5664 5665 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, 5666 i, &xv, NULL); 5667 if (ret) { 5668 mlog_errno(ret); 5669 break; 5670 } 5671 5672 ret = ocfs2_lock_xattr_remove_allocators(inode, xv, 5673 args->ref_ci, 5674 args->ref_root_bh, 5675 &ctxt.meta_ac, 5676 &ref_credits); 5677 5678 ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits); 5679 if (IS_ERR(ctxt.handle)) { 5680 ret = PTR_ERR(ctxt.handle); 5681 mlog_errno(ret); 5682 break; 5683 } 5684 5685 ret = ocfs2_xattr_bucket_value_truncate(inode, bucket, 5686 i, 0, &ctxt); 5687 5688 ocfs2_commit_trans(osb, ctxt.handle); 5689 if (ctxt.meta_ac) { 5690 ocfs2_free_alloc_context(ctxt.meta_ac); 5691 ctxt.meta_ac = NULL; 5692 } 5693 if (ret) { 5694 mlog_errno(ret); 5695 break; 5696 } 5697 } 5698 5699 if (ctxt.meta_ac) 5700 ocfs2_free_alloc_context(ctxt.meta_ac); 5701 ocfs2_schedule_truncate_log_flush(osb, 1); 5702 ocfs2_run_deallocs(osb, &ctxt.dealloc); 5703 return ret; 5704 } 5705 5706 /* 5707 * Whenever we modify a xattr value root in the bucket(e.g, CoW 5708 * or change the extent record flag), we need to recalculate 5709 * the metaecc for the whole bucket. So it is done here. 5710 * 5711 * Note: 5712 * We have to give the extra credits for the caller. 5713 */ 5714 static int ocfs2_xattr_bucket_post_refcount(struct inode *inode, 5715 handle_t *handle, 5716 void *para) 5717 { 5718 int ret; 5719 struct ocfs2_xattr_bucket *bucket = 5720 (struct ocfs2_xattr_bucket *)para; 5721 5722 ret = ocfs2_xattr_bucket_journal_access(handle, bucket, 5723 OCFS2_JOURNAL_ACCESS_WRITE); 5724 if (ret) { 5725 mlog_errno(ret); 5726 return ret; 5727 } 5728 5729 ocfs2_xattr_bucket_journal_dirty(handle, bucket); 5730 5731 return 0; 5732 } 5733 5734 /* 5735 * Special action we need if the xattr value is refcounted. 5736 * 5737 * 1. If the xattr is refcounted, lock the tree. 5738 * 2. CoW the xattr if we are setting the new value and the value 5739 * will be stored outside. 5740 * 3. In other case, decrease_refcount will work for us, so just 5741 * lock the refcount tree, calculate the meta and credits is OK. 5742 * 5743 * We have to do CoW before ocfs2_init_xattr_set_ctxt since 5744 * currently CoW is a completed transaction, while this function 5745 * will also lock the allocators and let us deadlock. So we will 5746 * CoW the whole xattr value. 5747 */ 5748 static int ocfs2_prepare_refcount_xattr(struct inode *inode, 5749 struct ocfs2_dinode *di, 5750 struct ocfs2_xattr_info *xi, 5751 struct ocfs2_xattr_search *xis, 5752 struct ocfs2_xattr_search *xbs, 5753 struct ocfs2_refcount_tree **ref_tree, 5754 int *meta_add, 5755 int *credits) 5756 { 5757 int ret = 0; 5758 struct ocfs2_xattr_block *xb; 5759 struct ocfs2_xattr_entry *xe; 5760 char *base; 5761 u32 p_cluster, num_clusters; 5762 unsigned int ext_flags; 5763 int name_offset, name_len; 5764 struct ocfs2_xattr_value_buf vb; 5765 struct ocfs2_xattr_bucket *bucket = NULL; 5766 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5767 struct ocfs2_post_refcount refcount; 5768 struct ocfs2_post_refcount *p = NULL; 5769 struct buffer_head *ref_root_bh = NULL; 5770 5771 if (!xis->not_found) { 5772 xe = xis->here; 5773 name_offset = le16_to_cpu(xe->xe_name_offset); 5774 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 5775 base = xis->base; 5776 vb.vb_bh = xis->inode_bh; 5777 vb.vb_access = ocfs2_journal_access_di; 5778 } else { 5779 int i, block_off = 0; 5780 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; 5781 xe = xbs->here; 5782 name_offset = le16_to_cpu(xe->xe_name_offset); 5783 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 5784 i = xbs->here - xbs->header->xh_entries; 5785 5786 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 5787 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 5788 bucket_xh(xbs->bucket), 5789 i, &block_off, 5790 &name_offset); 5791 if (ret) { 5792 mlog_errno(ret); 5793 goto out; 5794 } 5795 base = bucket_block(xbs->bucket, block_off); 5796 vb.vb_bh = xbs->bucket->bu_bhs[block_off]; 5797 vb.vb_access = ocfs2_journal_access; 5798 5799 if (ocfs2_meta_ecc(osb)) { 5800 /*create parameters for ocfs2_post_refcount. */ 5801 bucket = xbs->bucket; 5802 refcount.credits = bucket->bu_blocks; 5803 refcount.para = bucket; 5804 refcount.func = 5805 ocfs2_xattr_bucket_post_refcount; 5806 p = &refcount; 5807 } 5808 } else { 5809 base = xbs->base; 5810 vb.vb_bh = xbs->xattr_bh; 5811 vb.vb_access = ocfs2_journal_access_xb; 5812 } 5813 } 5814 5815 if (ocfs2_xattr_is_local(xe)) 5816 goto out; 5817 5818 vb.vb_xv = (struct ocfs2_xattr_value_root *) 5819 (base + name_offset + name_len); 5820 5821 ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster, 5822 &num_clusters, &vb.vb_xv->xr_list, 5823 &ext_flags); 5824 if (ret) { 5825 mlog_errno(ret); 5826 goto out; 5827 } 5828 5829 /* 5830 * We just need to check the 1st extent record, since we always 5831 * CoW the whole xattr. So there shouldn't be a xattr with 5832 * some REFCOUNT extent recs after the 1st one. 5833 */ 5834 if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) 5835 goto out; 5836 5837 ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc), 5838 1, ref_tree, &ref_root_bh); 5839 if (ret) { 5840 mlog_errno(ret); 5841 goto out; 5842 } 5843 5844 /* 5845 * If we are deleting the xattr or the new size will be stored inside, 5846 * cool, leave it there, the xattr truncate process will remove them 5847 * for us(it still needs the refcount tree lock and the meta, credits). 5848 * And the worse case is that every cluster truncate will split the 5849 * refcount tree, and make the original extent become 3. So we will need 5850 * 2 * cluster more extent recs at most. 5851 */ 5852 if (!xi->xi_value || xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE) { 5853 5854 ret = ocfs2_refcounted_xattr_delete_need(inode, 5855 &(*ref_tree)->rf_ci, 5856 ref_root_bh, vb.vb_xv, 5857 meta_add, credits); 5858 if (ret) 5859 mlog_errno(ret); 5860 goto out; 5861 } 5862 5863 ret = ocfs2_refcount_cow_xattr(inode, di, &vb, 5864 *ref_tree, ref_root_bh, 0, 5865 le32_to_cpu(vb.vb_xv->xr_clusters), p); 5866 if (ret) 5867 mlog_errno(ret); 5868 5869 out: 5870 brelse(ref_root_bh); 5871 return ret; 5872 } 5873 5874 /* 5875 * Add the REFCOUNTED flags for all the extent rec in ocfs2_xattr_value_root. 5876 * The physical clusters will be added to refcount tree. 5877 */ 5878 static int ocfs2_xattr_value_attach_refcount(struct inode *inode, 5879 struct ocfs2_xattr_value_root *xv, 5880 struct ocfs2_extent_tree *value_et, 5881 struct ocfs2_caching_info *ref_ci, 5882 struct buffer_head *ref_root_bh, 5883 struct ocfs2_cached_dealloc_ctxt *dealloc, 5884 struct ocfs2_post_refcount *refcount) 5885 { 5886 int ret = 0; 5887 u32 clusters = le32_to_cpu(xv->xr_clusters); 5888 u32 cpos, p_cluster, num_clusters; 5889 struct ocfs2_extent_list *el = &xv->xr_list; 5890 unsigned int ext_flags; 5891 5892 cpos = 0; 5893 while (cpos < clusters) { 5894 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, 5895 &num_clusters, el, &ext_flags); 5896 if (ret) { 5897 mlog_errno(ret); 5898 break; 5899 } 5900 5901 cpos += num_clusters; 5902 if ((ext_flags & OCFS2_EXT_REFCOUNTED)) 5903 continue; 5904 5905 BUG_ON(!p_cluster); 5906 5907 ret = ocfs2_add_refcount_flag(inode, value_et, 5908 ref_ci, ref_root_bh, 5909 cpos - num_clusters, 5910 p_cluster, num_clusters, 5911 dealloc, refcount); 5912 if (ret) { 5913 mlog_errno(ret); 5914 break; 5915 } 5916 } 5917 5918 return ret; 5919 } 5920 5921 /* 5922 * Given a normal ocfs2_xattr_header, refcount all the entries which 5923 * have value stored outside. 5924 * Used for xattrs stored in inode and ocfs2_xattr_block. 5925 */ 5926 static int ocfs2_xattr_attach_refcount_normal(struct inode *inode, 5927 struct ocfs2_xattr_value_buf *vb, 5928 struct ocfs2_xattr_header *header, 5929 struct ocfs2_caching_info *ref_ci, 5930 struct buffer_head *ref_root_bh, 5931 struct ocfs2_cached_dealloc_ctxt *dealloc) 5932 { 5933 5934 struct ocfs2_xattr_entry *xe; 5935 struct ocfs2_xattr_value_root *xv; 5936 struct ocfs2_extent_tree et; 5937 int i, ret = 0; 5938 5939 for (i = 0; i < le16_to_cpu(header->xh_count); i++) { 5940 xe = &header->xh_entries[i]; 5941 5942 if (ocfs2_xattr_is_local(xe)) 5943 continue; 5944 5945 xv = (struct ocfs2_xattr_value_root *)((void *)header + 5946 le16_to_cpu(xe->xe_name_offset) + 5947 OCFS2_XATTR_SIZE(xe->xe_name_len)); 5948 5949 vb->vb_xv = xv; 5950 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); 5951 5952 ret = ocfs2_xattr_value_attach_refcount(inode, xv, &et, 5953 ref_ci, ref_root_bh, 5954 dealloc, NULL); 5955 if (ret) { 5956 mlog_errno(ret); 5957 break; 5958 } 5959 } 5960 5961 return ret; 5962 } 5963 5964 static int ocfs2_xattr_inline_attach_refcount(struct inode *inode, 5965 struct buffer_head *fe_bh, 5966 struct ocfs2_caching_info *ref_ci, 5967 struct buffer_head *ref_root_bh, 5968 struct ocfs2_cached_dealloc_ctxt *dealloc) 5969 { 5970 struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data; 5971 struct ocfs2_xattr_header *header = (struct ocfs2_xattr_header *) 5972 (fe_bh->b_data + inode->i_sb->s_blocksize - 5973 le16_to_cpu(di->i_xattr_inline_size)); 5974 struct ocfs2_xattr_value_buf vb = { 5975 .vb_bh = fe_bh, 5976 .vb_access = ocfs2_journal_access_di, 5977 }; 5978 5979 return ocfs2_xattr_attach_refcount_normal(inode, &vb, header, 5980 ref_ci, ref_root_bh, dealloc); 5981 } 5982 5983 struct ocfs2_xattr_tree_value_refcount_para { 5984 struct ocfs2_caching_info *ref_ci; 5985 struct buffer_head *ref_root_bh; 5986 struct ocfs2_cached_dealloc_ctxt *dealloc; 5987 }; 5988 5989 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb, 5990 struct ocfs2_xattr_bucket *bucket, 5991 int offset, 5992 struct ocfs2_xattr_value_root **xv, 5993 struct buffer_head **bh) 5994 { 5995 int ret, block_off, name_offset; 5996 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 5997 struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset]; 5998 void *base; 5999 6000 ret = ocfs2_xattr_bucket_get_name_value(sb, 6001 bucket_xh(bucket), 6002 offset, 6003 &block_off, 6004 &name_offset); 6005 if (ret) { 6006 mlog_errno(ret); 6007 goto out; 6008 } 6009 6010 base = bucket_block(bucket, block_off); 6011 6012 *xv = (struct ocfs2_xattr_value_root *)(base + name_offset + 6013 OCFS2_XATTR_SIZE(xe->xe_name_len)); 6014 6015 if (bh) 6016 *bh = bucket->bu_bhs[block_off]; 6017 out: 6018 return ret; 6019 } 6020 6021 /* 6022 * For a given xattr bucket, refcount all the entries which 6023 * have value stored outside. 6024 */ 6025 static int ocfs2_xattr_bucket_value_refcount(struct inode *inode, 6026 struct ocfs2_xattr_bucket *bucket, 6027 void *para) 6028 { 6029 int i, ret = 0; 6030 struct ocfs2_extent_tree et; 6031 struct ocfs2_xattr_tree_value_refcount_para *ref = 6032 (struct ocfs2_xattr_tree_value_refcount_para *)para; 6033 struct ocfs2_xattr_header *xh = 6034 (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data; 6035 struct ocfs2_xattr_entry *xe; 6036 struct ocfs2_xattr_value_buf vb = { 6037 .vb_access = ocfs2_journal_access, 6038 }; 6039 struct ocfs2_post_refcount refcount = { 6040 .credits = bucket->bu_blocks, 6041 .para = bucket, 6042 .func = ocfs2_xattr_bucket_post_refcount, 6043 }; 6044 struct ocfs2_post_refcount *p = NULL; 6045 6046 /* We only need post_refcount if we support metaecc. */ 6047 if (ocfs2_meta_ecc(OCFS2_SB(inode->i_sb))) 6048 p = &refcount; 6049 6050 trace_ocfs2_xattr_bucket_value_refcount( 6051 (unsigned long long)bucket_blkno(bucket), 6052 le16_to_cpu(xh->xh_count)); 6053 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 6054 xe = &xh->xh_entries[i]; 6055 6056 if (ocfs2_xattr_is_local(xe)) 6057 continue; 6058 6059 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, i, 6060 &vb.vb_xv, &vb.vb_bh); 6061 if (ret) { 6062 mlog_errno(ret); 6063 break; 6064 } 6065 6066 ocfs2_init_xattr_value_extent_tree(&et, 6067 INODE_CACHE(inode), &vb); 6068 6069 ret = ocfs2_xattr_value_attach_refcount(inode, vb.vb_xv, 6070 &et, ref->ref_ci, 6071 ref->ref_root_bh, 6072 ref->dealloc, p); 6073 if (ret) { 6074 mlog_errno(ret); 6075 break; 6076 } 6077 } 6078 6079 return ret; 6080 6081 } 6082 6083 static int ocfs2_refcount_xattr_tree_rec(struct inode *inode, 6084 struct buffer_head *root_bh, 6085 u64 blkno, u32 cpos, u32 len, void *para) 6086 { 6087 return ocfs2_iterate_xattr_buckets(inode, blkno, len, 6088 ocfs2_xattr_bucket_value_refcount, 6089 para); 6090 } 6091 6092 static int ocfs2_xattr_block_attach_refcount(struct inode *inode, 6093 struct buffer_head *blk_bh, 6094 struct ocfs2_caching_info *ref_ci, 6095 struct buffer_head *ref_root_bh, 6096 struct ocfs2_cached_dealloc_ctxt *dealloc) 6097 { 6098 int ret = 0; 6099 struct ocfs2_xattr_block *xb = 6100 (struct ocfs2_xattr_block *)blk_bh->b_data; 6101 6102 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 6103 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header; 6104 struct ocfs2_xattr_value_buf vb = { 6105 .vb_bh = blk_bh, 6106 .vb_access = ocfs2_journal_access_xb, 6107 }; 6108 6109 ret = ocfs2_xattr_attach_refcount_normal(inode, &vb, header, 6110 ref_ci, ref_root_bh, 6111 dealloc); 6112 } else { 6113 struct ocfs2_xattr_tree_value_refcount_para para = { 6114 .ref_ci = ref_ci, 6115 .ref_root_bh = ref_root_bh, 6116 .dealloc = dealloc, 6117 }; 6118 6119 ret = ocfs2_iterate_xattr_index_block(inode, blk_bh, 6120 ocfs2_refcount_xattr_tree_rec, 6121 ¶); 6122 } 6123 6124 return ret; 6125 } 6126 6127 int ocfs2_xattr_attach_refcount_tree(struct inode *inode, 6128 struct buffer_head *fe_bh, 6129 struct ocfs2_caching_info *ref_ci, 6130 struct buffer_head *ref_root_bh, 6131 struct ocfs2_cached_dealloc_ctxt *dealloc) 6132 { 6133 int ret = 0; 6134 struct ocfs2_inode_info *oi = OCFS2_I(inode); 6135 struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data; 6136 struct buffer_head *blk_bh = NULL; 6137 6138 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 6139 ret = ocfs2_xattr_inline_attach_refcount(inode, fe_bh, 6140 ref_ci, ref_root_bh, 6141 dealloc); 6142 if (ret) { 6143 mlog_errno(ret); 6144 goto out; 6145 } 6146 } 6147 6148 if (!di->i_xattr_loc) 6149 goto out; 6150 6151 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc), 6152 &blk_bh); 6153 if (ret < 0) { 6154 mlog_errno(ret); 6155 goto out; 6156 } 6157 6158 ret = ocfs2_xattr_block_attach_refcount(inode, blk_bh, ref_ci, 6159 ref_root_bh, dealloc); 6160 if (ret) 6161 mlog_errno(ret); 6162 6163 brelse(blk_bh); 6164 out: 6165 6166 return ret; 6167 } 6168 6169 typedef int (should_xattr_reflinked)(struct ocfs2_xattr_entry *xe); 6170 /* 6171 * Store the information we need in xattr reflink. 6172 * old_bh and new_bh are inode bh for the old and new inode. 6173 */ 6174 struct ocfs2_xattr_reflink { 6175 struct inode *old_inode; 6176 struct inode *new_inode; 6177 struct buffer_head *old_bh; 6178 struct buffer_head *new_bh; 6179 struct ocfs2_caching_info *ref_ci; 6180 struct buffer_head *ref_root_bh; 6181 struct ocfs2_cached_dealloc_ctxt *dealloc; 6182 should_xattr_reflinked *xattr_reflinked; 6183 }; 6184 6185 /* 6186 * Given a xattr header and xe offset, 6187 * return the proper xv and the corresponding bh. 6188 * xattr in inode, block and xattr tree have different implementaions. 6189 */ 6190 typedef int (get_xattr_value_root)(struct super_block *sb, 6191 struct buffer_head *bh, 6192 struct ocfs2_xattr_header *xh, 6193 int offset, 6194 struct ocfs2_xattr_value_root **xv, 6195 struct buffer_head **ret_bh, 6196 void *para); 6197 6198 /* 6199 * Calculate all the xattr value root metadata stored in this xattr header and 6200 * credits we need if we create them from the scratch. 6201 * We use get_xattr_value_root so that all types of xattr container can use it. 6202 */ 6203 static int ocfs2_value_metas_in_xattr_header(struct super_block *sb, 6204 struct buffer_head *bh, 6205 struct ocfs2_xattr_header *xh, 6206 int *metas, int *credits, 6207 int *num_recs, 6208 get_xattr_value_root *func, 6209 void *para) 6210 { 6211 int i, ret = 0; 6212 struct ocfs2_xattr_value_root *xv; 6213 struct ocfs2_xattr_entry *xe; 6214 6215 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 6216 xe = &xh->xh_entries[i]; 6217 if (ocfs2_xattr_is_local(xe)) 6218 continue; 6219 6220 ret = func(sb, bh, xh, i, &xv, NULL, para); 6221 if (ret) { 6222 mlog_errno(ret); 6223 break; 6224 } 6225 6226 *metas += le16_to_cpu(xv->xr_list.l_tree_depth) * 6227 le16_to_cpu(xv->xr_list.l_next_free_rec); 6228 6229 *credits += ocfs2_calc_extend_credits(sb, 6230 &def_xv.xv.xr_list); 6231 6232 /* 6233 * If the value is a tree with depth > 1, We don't go deep 6234 * to the extent block, so just calculate a maximum record num. 6235 */ 6236 if (!xv->xr_list.l_tree_depth) 6237 *num_recs += le16_to_cpu(xv->xr_list.l_next_free_rec); 6238 else 6239 *num_recs += ocfs2_clusters_for_bytes(sb, 6240 XATTR_SIZE_MAX); 6241 } 6242 6243 return ret; 6244 } 6245 6246 /* Used by xattr inode and block to return the right xv and buffer_head. */ 6247 static int ocfs2_get_xattr_value_root(struct super_block *sb, 6248 struct buffer_head *bh, 6249 struct ocfs2_xattr_header *xh, 6250 int offset, 6251 struct ocfs2_xattr_value_root **xv, 6252 struct buffer_head **ret_bh, 6253 void *para) 6254 { 6255 struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset]; 6256 6257 *xv = (struct ocfs2_xattr_value_root *)((void *)xh + 6258 le16_to_cpu(xe->xe_name_offset) + 6259 OCFS2_XATTR_SIZE(xe->xe_name_len)); 6260 6261 if (ret_bh) 6262 *ret_bh = bh; 6263 6264 return 0; 6265 } 6266 6267 /* 6268 * Lock the meta_ac and caculate how much credits we need for reflink xattrs. 6269 * It is only used for inline xattr and xattr block. 6270 */ 6271 static int ocfs2_reflink_lock_xattr_allocators(struct ocfs2_super *osb, 6272 struct ocfs2_xattr_header *xh, 6273 struct buffer_head *ref_root_bh, 6274 int *credits, 6275 struct ocfs2_alloc_context **meta_ac) 6276 { 6277 int ret, meta_add = 0, num_recs = 0; 6278 struct ocfs2_refcount_block *rb = 6279 (struct ocfs2_refcount_block *)ref_root_bh->b_data; 6280 6281 *credits = 0; 6282 6283 ret = ocfs2_value_metas_in_xattr_header(osb->sb, NULL, xh, 6284 &meta_add, credits, &num_recs, 6285 ocfs2_get_xattr_value_root, 6286 NULL); 6287 if (ret) { 6288 mlog_errno(ret); 6289 goto out; 6290 } 6291 6292 /* 6293 * We need to add/modify num_recs in refcount tree, so just calculate 6294 * an approximate number we need for refcount tree change. 6295 * Sometimes we need to split the tree, and after split, half recs 6296 * will be moved to the new block, and a new block can only provide 6297 * half number of recs. So we multiple new blocks by 2. 6298 */ 6299 num_recs = num_recs / ocfs2_refcount_recs_per_rb(osb->sb) * 2; 6300 meta_add += num_recs; 6301 *credits += num_recs + num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS; 6302 if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL) 6303 *credits += le16_to_cpu(rb->rf_list.l_tree_depth) * 6304 le16_to_cpu(rb->rf_list.l_next_free_rec) + 1; 6305 else 6306 *credits += 1; 6307 6308 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, meta_ac); 6309 if (ret) 6310 mlog_errno(ret); 6311 6312 out: 6313 return ret; 6314 } 6315 6316 /* 6317 * Given a xattr header, reflink all the xattrs in this container. 6318 * It can be used for inode, block and bucket. 6319 * 6320 * NOTE: 6321 * Before we call this function, the caller has memcpy the xattr in 6322 * old_xh to the new_xh. 6323 * 6324 * If args.xattr_reflinked is set, call it to decide whether the xe should 6325 * be reflinked or not. If not, remove it from the new xattr header. 6326 */ 6327 static int ocfs2_reflink_xattr_header(handle_t *handle, 6328 struct ocfs2_xattr_reflink *args, 6329 struct buffer_head *old_bh, 6330 struct ocfs2_xattr_header *xh, 6331 struct buffer_head *new_bh, 6332 struct ocfs2_xattr_header *new_xh, 6333 struct ocfs2_xattr_value_buf *vb, 6334 struct ocfs2_alloc_context *meta_ac, 6335 get_xattr_value_root *func, 6336 void *para) 6337 { 6338 int ret = 0, i, j; 6339 struct super_block *sb = args->old_inode->i_sb; 6340 struct buffer_head *value_bh; 6341 struct ocfs2_xattr_entry *xe, *last; 6342 struct ocfs2_xattr_value_root *xv, *new_xv; 6343 struct ocfs2_extent_tree data_et; 6344 u32 clusters, cpos, p_cluster, num_clusters; 6345 unsigned int ext_flags = 0; 6346 6347 trace_ocfs2_reflink_xattr_header((unsigned long long)old_bh->b_blocknr, 6348 le16_to_cpu(xh->xh_count)); 6349 6350 last = &new_xh->xh_entries[le16_to_cpu(new_xh->xh_count)]; 6351 for (i = 0, j = 0; i < le16_to_cpu(xh->xh_count); i++, j++) { 6352 xe = &xh->xh_entries[i]; 6353 6354 if (args->xattr_reflinked && !args->xattr_reflinked(xe)) { 6355 xe = &new_xh->xh_entries[j]; 6356 6357 le16_add_cpu(&new_xh->xh_count, -1); 6358 if (new_xh->xh_count) { 6359 memmove(xe, xe + 1, 6360 (void *)last - (void *)xe); 6361 memset(last, 0, 6362 sizeof(struct ocfs2_xattr_entry)); 6363 } 6364 6365 /* 6366 * We don't want j to increase in the next round since 6367 * it is already moved ahead. 6368 */ 6369 j--; 6370 continue; 6371 } 6372 6373 if (ocfs2_xattr_is_local(xe)) 6374 continue; 6375 6376 ret = func(sb, old_bh, xh, i, &xv, NULL, para); 6377 if (ret) { 6378 mlog_errno(ret); 6379 break; 6380 } 6381 6382 ret = func(sb, new_bh, new_xh, j, &new_xv, &value_bh, para); 6383 if (ret) { 6384 mlog_errno(ret); 6385 break; 6386 } 6387 6388 /* 6389 * For the xattr which has l_tree_depth = 0, all the extent 6390 * recs have already be copied to the new xh with the 6391 * propriate OCFS2_EXT_REFCOUNTED flag we just need to 6392 * increase the refount count int the refcount tree. 6393 * 6394 * For the xattr which has l_tree_depth > 0, we need 6395 * to initialize it to the empty default value root, 6396 * and then insert the extents one by one. 6397 */ 6398 if (xv->xr_list.l_tree_depth) { 6399 memcpy(new_xv, &def_xv, sizeof(def_xv)); 6400 vb->vb_xv = new_xv; 6401 vb->vb_bh = value_bh; 6402 ocfs2_init_xattr_value_extent_tree(&data_et, 6403 INODE_CACHE(args->new_inode), vb); 6404 } 6405 6406 clusters = le32_to_cpu(xv->xr_clusters); 6407 cpos = 0; 6408 while (cpos < clusters) { 6409 ret = ocfs2_xattr_get_clusters(args->old_inode, 6410 cpos, 6411 &p_cluster, 6412 &num_clusters, 6413 &xv->xr_list, 6414 &ext_flags); 6415 if (ret) { 6416 mlog_errno(ret); 6417 goto out; 6418 } 6419 6420 BUG_ON(!p_cluster); 6421 6422 if (xv->xr_list.l_tree_depth) { 6423 ret = ocfs2_insert_extent(handle, 6424 &data_et, cpos, 6425 ocfs2_clusters_to_blocks( 6426 args->old_inode->i_sb, 6427 p_cluster), 6428 num_clusters, ext_flags, 6429 meta_ac); 6430 if (ret) { 6431 mlog_errno(ret); 6432 goto out; 6433 } 6434 } 6435 6436 ret = ocfs2_increase_refcount(handle, args->ref_ci, 6437 args->ref_root_bh, 6438 p_cluster, num_clusters, 6439 meta_ac, args->dealloc); 6440 if (ret) { 6441 mlog_errno(ret); 6442 goto out; 6443 } 6444 6445 cpos += num_clusters; 6446 } 6447 } 6448 6449 out: 6450 return ret; 6451 } 6452 6453 static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args) 6454 { 6455 int ret = 0, credits = 0; 6456 handle_t *handle; 6457 struct ocfs2_super *osb = OCFS2_SB(args->old_inode->i_sb); 6458 struct ocfs2_dinode *di = (struct ocfs2_dinode *)args->old_bh->b_data; 6459 int inline_size = le16_to_cpu(di->i_xattr_inline_size); 6460 int header_off = osb->sb->s_blocksize - inline_size; 6461 struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *) 6462 (args->old_bh->b_data + header_off); 6463 struct ocfs2_xattr_header *new_xh = (struct ocfs2_xattr_header *) 6464 (args->new_bh->b_data + header_off); 6465 struct ocfs2_alloc_context *meta_ac = NULL; 6466 struct ocfs2_inode_info *new_oi; 6467 struct ocfs2_dinode *new_di; 6468 struct ocfs2_xattr_value_buf vb = { 6469 .vb_bh = args->new_bh, 6470 .vb_access = ocfs2_journal_access_di, 6471 }; 6472 6473 ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh, 6474 &credits, &meta_ac); 6475 if (ret) { 6476 mlog_errno(ret); 6477 goto out; 6478 } 6479 6480 handle = ocfs2_start_trans(osb, credits); 6481 if (IS_ERR(handle)) { 6482 ret = PTR_ERR(handle); 6483 mlog_errno(ret); 6484 goto out; 6485 } 6486 6487 ret = ocfs2_journal_access_di(handle, INODE_CACHE(args->new_inode), 6488 args->new_bh, OCFS2_JOURNAL_ACCESS_WRITE); 6489 if (ret) { 6490 mlog_errno(ret); 6491 goto out_commit; 6492 } 6493 6494 memcpy(args->new_bh->b_data + header_off, 6495 args->old_bh->b_data + header_off, inline_size); 6496 6497 new_di = (struct ocfs2_dinode *)args->new_bh->b_data; 6498 new_di->i_xattr_inline_size = cpu_to_le16(inline_size); 6499 6500 ret = ocfs2_reflink_xattr_header(handle, args, args->old_bh, xh, 6501 args->new_bh, new_xh, &vb, meta_ac, 6502 ocfs2_get_xattr_value_root, NULL); 6503 if (ret) { 6504 mlog_errno(ret); 6505 goto out_commit; 6506 } 6507 6508 new_oi = OCFS2_I(args->new_inode); 6509 /* 6510 * Adjust extent record count to reserve space for extended attribute. 6511 * Inline data count had been adjusted in ocfs2_duplicate_inline_data(). 6512 */ 6513 if (!(new_oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) && 6514 !(ocfs2_inode_is_fast_symlink(args->new_inode))) { 6515 struct ocfs2_extent_list *el = &new_di->id2.i_list; 6516 le16_add_cpu(&el->l_count, -(inline_size / 6517 sizeof(struct ocfs2_extent_rec))); 6518 } 6519 spin_lock(&new_oi->ip_lock); 6520 new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL; 6521 new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features); 6522 spin_unlock(&new_oi->ip_lock); 6523 6524 ocfs2_journal_dirty(handle, args->new_bh); 6525 6526 out_commit: 6527 ocfs2_commit_trans(osb, handle); 6528 6529 out: 6530 if (meta_ac) 6531 ocfs2_free_alloc_context(meta_ac); 6532 return ret; 6533 } 6534 6535 static int ocfs2_create_empty_xattr_block(struct inode *inode, 6536 struct buffer_head *fe_bh, 6537 struct buffer_head **ret_bh, 6538 int indexed) 6539 { 6540 int ret; 6541 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 6542 struct ocfs2_xattr_set_ctxt ctxt; 6543 6544 memset(&ctxt, 0, sizeof(ctxt)); 6545 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &ctxt.meta_ac); 6546 if (ret < 0) { 6547 mlog_errno(ret); 6548 return ret; 6549 } 6550 6551 ctxt.handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_CREATE_CREDITS); 6552 if (IS_ERR(ctxt.handle)) { 6553 ret = PTR_ERR(ctxt.handle); 6554 mlog_errno(ret); 6555 goto out; 6556 } 6557 6558 trace_ocfs2_create_empty_xattr_block( 6559 (unsigned long long)fe_bh->b_blocknr, indexed); 6560 ret = ocfs2_create_xattr_block(inode, fe_bh, &ctxt, indexed, 6561 ret_bh); 6562 if (ret) 6563 mlog_errno(ret); 6564 6565 ocfs2_commit_trans(osb, ctxt.handle); 6566 out: 6567 ocfs2_free_alloc_context(ctxt.meta_ac); 6568 return ret; 6569 } 6570 6571 static int ocfs2_reflink_xattr_block(struct ocfs2_xattr_reflink *args, 6572 struct buffer_head *blk_bh, 6573 struct buffer_head *new_blk_bh) 6574 { 6575 int ret = 0, credits = 0; 6576 handle_t *handle; 6577 struct ocfs2_inode_info *new_oi = OCFS2_I(args->new_inode); 6578 struct ocfs2_dinode *new_di; 6579 struct ocfs2_super *osb = OCFS2_SB(args->new_inode->i_sb); 6580 int header_off = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); 6581 struct ocfs2_xattr_block *xb = 6582 (struct ocfs2_xattr_block *)blk_bh->b_data; 6583 struct ocfs2_xattr_header *xh = &xb->xb_attrs.xb_header; 6584 struct ocfs2_xattr_block *new_xb = 6585 (struct ocfs2_xattr_block *)new_blk_bh->b_data; 6586 struct ocfs2_xattr_header *new_xh = &new_xb->xb_attrs.xb_header; 6587 struct ocfs2_alloc_context *meta_ac; 6588 struct ocfs2_xattr_value_buf vb = { 6589 .vb_bh = new_blk_bh, 6590 .vb_access = ocfs2_journal_access_xb, 6591 }; 6592 6593 ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh, 6594 &credits, &meta_ac); 6595 if (ret) { 6596 mlog_errno(ret); 6597 return ret; 6598 } 6599 6600 /* One more credits in case we need to add xattr flags in new inode. */ 6601 handle = ocfs2_start_trans(osb, credits + 1); 6602 if (IS_ERR(handle)) { 6603 ret = PTR_ERR(handle); 6604 mlog_errno(ret); 6605 goto out; 6606 } 6607 6608 if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) { 6609 ret = ocfs2_journal_access_di(handle, 6610 INODE_CACHE(args->new_inode), 6611 args->new_bh, 6612 OCFS2_JOURNAL_ACCESS_WRITE); 6613 if (ret) { 6614 mlog_errno(ret); 6615 goto out_commit; 6616 } 6617 } 6618 6619 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(args->new_inode), 6620 new_blk_bh, OCFS2_JOURNAL_ACCESS_WRITE); 6621 if (ret) { 6622 mlog_errno(ret); 6623 goto out_commit; 6624 } 6625 6626 memcpy(new_blk_bh->b_data + header_off, blk_bh->b_data + header_off, 6627 osb->sb->s_blocksize - header_off); 6628 6629 ret = ocfs2_reflink_xattr_header(handle, args, blk_bh, xh, 6630 new_blk_bh, new_xh, &vb, meta_ac, 6631 ocfs2_get_xattr_value_root, NULL); 6632 if (ret) { 6633 mlog_errno(ret); 6634 goto out_commit; 6635 } 6636 6637 ocfs2_journal_dirty(handle, new_blk_bh); 6638 6639 if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) { 6640 new_di = (struct ocfs2_dinode *)args->new_bh->b_data; 6641 spin_lock(&new_oi->ip_lock); 6642 new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL; 6643 new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features); 6644 spin_unlock(&new_oi->ip_lock); 6645 6646 ocfs2_journal_dirty(handle, args->new_bh); 6647 } 6648 6649 out_commit: 6650 ocfs2_commit_trans(osb, handle); 6651 6652 out: 6653 ocfs2_free_alloc_context(meta_ac); 6654 return ret; 6655 } 6656 6657 struct ocfs2_reflink_xattr_tree_args { 6658 struct ocfs2_xattr_reflink *reflink; 6659 struct buffer_head *old_blk_bh; 6660 struct buffer_head *new_blk_bh; 6661 struct ocfs2_xattr_bucket *old_bucket; 6662 struct ocfs2_xattr_bucket *new_bucket; 6663 }; 6664 6665 /* 6666 * NOTE: 6667 * We have to handle the case that both old bucket and new bucket 6668 * will call this function to get the right ret_bh. 6669 * So The caller must give us the right bh. 6670 */ 6671 static int ocfs2_get_reflink_xattr_value_root(struct super_block *sb, 6672 struct buffer_head *bh, 6673 struct ocfs2_xattr_header *xh, 6674 int offset, 6675 struct ocfs2_xattr_value_root **xv, 6676 struct buffer_head **ret_bh, 6677 void *para) 6678 { 6679 struct ocfs2_reflink_xattr_tree_args *args = 6680 (struct ocfs2_reflink_xattr_tree_args *)para; 6681 struct ocfs2_xattr_bucket *bucket; 6682 6683 if (bh == args->old_bucket->bu_bhs[0]) 6684 bucket = args->old_bucket; 6685 else 6686 bucket = args->new_bucket; 6687 6688 return ocfs2_get_xattr_tree_value_root(sb, bucket, offset, 6689 xv, ret_bh); 6690 } 6691 6692 struct ocfs2_value_tree_metas { 6693 int num_metas; 6694 int credits; 6695 int num_recs; 6696 }; 6697 6698 static int ocfs2_value_tree_metas_in_bucket(struct super_block *sb, 6699 struct buffer_head *bh, 6700 struct ocfs2_xattr_header *xh, 6701 int offset, 6702 struct ocfs2_xattr_value_root **xv, 6703 struct buffer_head **ret_bh, 6704 void *para) 6705 { 6706 struct ocfs2_xattr_bucket *bucket = 6707 (struct ocfs2_xattr_bucket *)para; 6708 6709 return ocfs2_get_xattr_tree_value_root(sb, bucket, offset, 6710 xv, ret_bh); 6711 } 6712 6713 static int ocfs2_calc_value_tree_metas(struct inode *inode, 6714 struct ocfs2_xattr_bucket *bucket, 6715 void *para) 6716 { 6717 struct ocfs2_value_tree_metas *metas = 6718 (struct ocfs2_value_tree_metas *)para; 6719 struct ocfs2_xattr_header *xh = 6720 (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data; 6721 6722 /* Add the credits for this bucket first. */ 6723 metas->credits += bucket->bu_blocks; 6724 return ocfs2_value_metas_in_xattr_header(inode->i_sb, bucket->bu_bhs[0], 6725 xh, &metas->num_metas, 6726 &metas->credits, &metas->num_recs, 6727 ocfs2_value_tree_metas_in_bucket, 6728 bucket); 6729 } 6730 6731 /* 6732 * Given a xattr extent rec starting from blkno and having len clusters, 6733 * iterate all the buckets calculate how much metadata we need for reflinking 6734 * all the ocfs2_xattr_value_root and lock the allocators accordingly. 6735 */ 6736 static int ocfs2_lock_reflink_xattr_rec_allocators( 6737 struct ocfs2_reflink_xattr_tree_args *args, 6738 struct ocfs2_extent_tree *xt_et, 6739 u64 blkno, u32 len, int *credits, 6740 struct ocfs2_alloc_context **meta_ac, 6741 struct ocfs2_alloc_context **data_ac) 6742 { 6743 int ret, num_free_extents; 6744 struct ocfs2_value_tree_metas metas; 6745 struct ocfs2_super *osb = OCFS2_SB(args->reflink->old_inode->i_sb); 6746 struct ocfs2_refcount_block *rb; 6747 6748 memset(&metas, 0, sizeof(metas)); 6749 6750 ret = ocfs2_iterate_xattr_buckets(args->reflink->old_inode, blkno, len, 6751 ocfs2_calc_value_tree_metas, &metas); 6752 if (ret) { 6753 mlog_errno(ret); 6754 goto out; 6755 } 6756 6757 *credits = metas.credits; 6758 6759 /* 6760 * Calculate we need for refcount tree change. 6761 * 6762 * We need to add/modify num_recs in refcount tree, so just calculate 6763 * an approximate number we need for refcount tree change. 6764 * Sometimes we need to split the tree, and after split, half recs 6765 * will be moved to the new block, and a new block can only provide 6766 * half number of recs. So we multiple new blocks by 2. 6767 * In the end, we have to add credits for modifying the already 6768 * existed refcount block. 6769 */ 6770 rb = (struct ocfs2_refcount_block *)args->reflink->ref_root_bh->b_data; 6771 metas.num_recs = 6772 (metas.num_recs + ocfs2_refcount_recs_per_rb(osb->sb) - 1) / 6773 ocfs2_refcount_recs_per_rb(osb->sb) * 2; 6774 metas.num_metas += metas.num_recs; 6775 *credits += metas.num_recs + 6776 metas.num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS; 6777 if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL) 6778 *credits += le16_to_cpu(rb->rf_list.l_tree_depth) * 6779 le16_to_cpu(rb->rf_list.l_next_free_rec) + 1; 6780 else 6781 *credits += 1; 6782 6783 /* count in the xattr tree change. */ 6784 num_free_extents = ocfs2_num_free_extents(osb, xt_et); 6785 if (num_free_extents < 0) { 6786 ret = num_free_extents; 6787 mlog_errno(ret); 6788 goto out; 6789 } 6790 6791 if (num_free_extents < len) 6792 metas.num_metas += ocfs2_extend_meta_needed(xt_et->et_root_el); 6793 6794 *credits += ocfs2_calc_extend_credits(osb->sb, 6795 xt_et->et_root_el); 6796 6797 if (metas.num_metas) { 6798 ret = ocfs2_reserve_new_metadata_blocks(osb, metas.num_metas, 6799 meta_ac); 6800 if (ret) { 6801 mlog_errno(ret); 6802 goto out; 6803 } 6804 } 6805 6806 if (len) { 6807 ret = ocfs2_reserve_clusters(osb, len, data_ac); 6808 if (ret) 6809 mlog_errno(ret); 6810 } 6811 out: 6812 if (ret) { 6813 if (*meta_ac) { 6814 ocfs2_free_alloc_context(*meta_ac); 6815 *meta_ac = NULL; 6816 } 6817 } 6818 6819 return ret; 6820 } 6821 6822 static int ocfs2_reflink_xattr_bucket(handle_t *handle, 6823 u64 blkno, u64 new_blkno, u32 clusters, 6824 u32 *cpos, int num_buckets, 6825 struct ocfs2_alloc_context *meta_ac, 6826 struct ocfs2_alloc_context *data_ac, 6827 struct ocfs2_reflink_xattr_tree_args *args) 6828 { 6829 int i, j, ret = 0; 6830 struct super_block *sb = args->reflink->old_inode->i_sb; 6831 int bpb = args->old_bucket->bu_blocks; 6832 struct ocfs2_xattr_value_buf vb = { 6833 .vb_access = ocfs2_journal_access, 6834 }; 6835 6836 for (i = 0; i < num_buckets; i++, blkno += bpb, new_blkno += bpb) { 6837 ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno); 6838 if (ret) { 6839 mlog_errno(ret); 6840 break; 6841 } 6842 6843 ret = ocfs2_init_xattr_bucket(args->new_bucket, new_blkno, 1); 6844 if (ret) { 6845 mlog_errno(ret); 6846 break; 6847 } 6848 6849 ret = ocfs2_xattr_bucket_journal_access(handle, 6850 args->new_bucket, 6851 OCFS2_JOURNAL_ACCESS_CREATE); 6852 if (ret) { 6853 mlog_errno(ret); 6854 break; 6855 } 6856 6857 for (j = 0; j < bpb; j++) 6858 memcpy(bucket_block(args->new_bucket, j), 6859 bucket_block(args->old_bucket, j), 6860 sb->s_blocksize); 6861 6862 /* 6863 * Record the start cpos so that we can use it to initialize 6864 * our xattr tree we also set the xh_num_bucket for the new 6865 * bucket. 6866 */ 6867 if (i == 0) { 6868 *cpos = le32_to_cpu(bucket_xh(args->new_bucket)-> 6869 xh_entries[0].xe_name_hash); 6870 bucket_xh(args->new_bucket)->xh_num_buckets = 6871 cpu_to_le16(num_buckets); 6872 } 6873 6874 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket); 6875 6876 ret = ocfs2_reflink_xattr_header(handle, args->reflink, 6877 args->old_bucket->bu_bhs[0], 6878 bucket_xh(args->old_bucket), 6879 args->new_bucket->bu_bhs[0], 6880 bucket_xh(args->new_bucket), 6881 &vb, meta_ac, 6882 ocfs2_get_reflink_xattr_value_root, 6883 args); 6884 if (ret) { 6885 mlog_errno(ret); 6886 break; 6887 } 6888 6889 /* 6890 * Re-access and dirty the bucket to calculate metaecc. 6891 * Because we may extend the transaction in reflink_xattr_header 6892 * which will let the already accessed block gone. 6893 */ 6894 ret = ocfs2_xattr_bucket_journal_access(handle, 6895 args->new_bucket, 6896 OCFS2_JOURNAL_ACCESS_WRITE); 6897 if (ret) { 6898 mlog_errno(ret); 6899 break; 6900 } 6901 6902 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket); 6903 6904 ocfs2_xattr_bucket_relse(args->old_bucket); 6905 ocfs2_xattr_bucket_relse(args->new_bucket); 6906 } 6907 6908 ocfs2_xattr_bucket_relse(args->old_bucket); 6909 ocfs2_xattr_bucket_relse(args->new_bucket); 6910 return ret; 6911 } 6912 6913 static int ocfs2_reflink_xattr_buckets(handle_t *handle, 6914 struct inode *inode, 6915 struct ocfs2_reflink_xattr_tree_args *args, 6916 struct ocfs2_extent_tree *et, 6917 struct ocfs2_alloc_context *meta_ac, 6918 struct ocfs2_alloc_context *data_ac, 6919 u64 blkno, u32 cpos, u32 len) 6920 { 6921 int ret, first_inserted = 0; 6922 u32 p_cluster, num_clusters, reflink_cpos = 0; 6923 u64 new_blkno; 6924 unsigned int num_buckets, reflink_buckets; 6925 unsigned int bpc = 6926 ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)); 6927 6928 ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno); 6929 if (ret) { 6930 mlog_errno(ret); 6931 goto out; 6932 } 6933 num_buckets = le16_to_cpu(bucket_xh(args->old_bucket)->xh_num_buckets); 6934 ocfs2_xattr_bucket_relse(args->old_bucket); 6935 6936 while (len && num_buckets) { 6937 ret = ocfs2_claim_clusters(handle, data_ac, 6938 1, &p_cluster, &num_clusters); 6939 if (ret) { 6940 mlog_errno(ret); 6941 goto out; 6942 } 6943 6944 new_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 6945 reflink_buckets = min(num_buckets, bpc * num_clusters); 6946 6947 ret = ocfs2_reflink_xattr_bucket(handle, blkno, 6948 new_blkno, num_clusters, 6949 &reflink_cpos, reflink_buckets, 6950 meta_ac, data_ac, args); 6951 if (ret) { 6952 mlog_errno(ret); 6953 goto out; 6954 } 6955 6956 /* 6957 * For the 1st allocated cluster, we make it use the same cpos 6958 * so that the xattr tree looks the same as the original one 6959 * in the most case. 6960 */ 6961 if (!first_inserted) { 6962 reflink_cpos = cpos; 6963 first_inserted = 1; 6964 } 6965 ret = ocfs2_insert_extent(handle, et, reflink_cpos, new_blkno, 6966 num_clusters, 0, meta_ac); 6967 if (ret) 6968 mlog_errno(ret); 6969 6970 trace_ocfs2_reflink_xattr_buckets((unsigned long long)new_blkno, 6971 num_clusters, reflink_cpos); 6972 6973 len -= num_clusters; 6974 blkno += ocfs2_clusters_to_blocks(inode->i_sb, num_clusters); 6975 num_buckets -= reflink_buckets; 6976 } 6977 out: 6978 return ret; 6979 } 6980 6981 /* 6982 * Create the same xattr extent record in the new inode's xattr tree. 6983 */ 6984 static int ocfs2_reflink_xattr_rec(struct inode *inode, 6985 struct buffer_head *root_bh, 6986 u64 blkno, 6987 u32 cpos, 6988 u32 len, 6989 void *para) 6990 { 6991 int ret, credits = 0; 6992 handle_t *handle; 6993 struct ocfs2_reflink_xattr_tree_args *args = 6994 (struct ocfs2_reflink_xattr_tree_args *)para; 6995 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 6996 struct ocfs2_alloc_context *meta_ac = NULL; 6997 struct ocfs2_alloc_context *data_ac = NULL; 6998 struct ocfs2_extent_tree et; 6999 7000 trace_ocfs2_reflink_xattr_rec((unsigned long long)blkno, len); 7001 7002 ocfs2_init_xattr_tree_extent_tree(&et, 7003 INODE_CACHE(args->reflink->new_inode), 7004 args->new_blk_bh); 7005 7006 ret = ocfs2_lock_reflink_xattr_rec_allocators(args, &et, blkno, 7007 len, &credits, 7008 &meta_ac, &data_ac); 7009 if (ret) { 7010 mlog_errno(ret); 7011 goto out; 7012 } 7013 7014 handle = ocfs2_start_trans(osb, credits); 7015 if (IS_ERR(handle)) { 7016 ret = PTR_ERR(handle); 7017 mlog_errno(ret); 7018 goto out; 7019 } 7020 7021 ret = ocfs2_reflink_xattr_buckets(handle, inode, args, &et, 7022 meta_ac, data_ac, 7023 blkno, cpos, len); 7024 if (ret) 7025 mlog_errno(ret); 7026 7027 ocfs2_commit_trans(osb, handle); 7028 7029 out: 7030 if (meta_ac) 7031 ocfs2_free_alloc_context(meta_ac); 7032 if (data_ac) 7033 ocfs2_free_alloc_context(data_ac); 7034 return ret; 7035 } 7036 7037 /* 7038 * Create reflinked xattr buckets. 7039 * We will add bucket one by one, and refcount all the xattrs in the bucket 7040 * if they are stored outside. 7041 */ 7042 static int ocfs2_reflink_xattr_tree(struct ocfs2_xattr_reflink *args, 7043 struct buffer_head *blk_bh, 7044 struct buffer_head *new_blk_bh) 7045 { 7046 int ret; 7047 struct ocfs2_reflink_xattr_tree_args para; 7048 7049 memset(¶, 0, sizeof(para)); 7050 para.reflink = args; 7051 para.old_blk_bh = blk_bh; 7052 para.new_blk_bh = new_blk_bh; 7053 7054 para.old_bucket = ocfs2_xattr_bucket_new(args->old_inode); 7055 if (!para.old_bucket) { 7056 mlog_errno(-ENOMEM); 7057 return -ENOMEM; 7058 } 7059 7060 para.new_bucket = ocfs2_xattr_bucket_new(args->new_inode); 7061 if (!para.new_bucket) { 7062 ret = -ENOMEM; 7063 mlog_errno(ret); 7064 goto out; 7065 } 7066 7067 ret = ocfs2_iterate_xattr_index_block(args->old_inode, blk_bh, 7068 ocfs2_reflink_xattr_rec, 7069 ¶); 7070 if (ret) 7071 mlog_errno(ret); 7072 7073 out: 7074 ocfs2_xattr_bucket_free(para.old_bucket); 7075 ocfs2_xattr_bucket_free(para.new_bucket); 7076 return ret; 7077 } 7078 7079 static int ocfs2_reflink_xattr_in_block(struct ocfs2_xattr_reflink *args, 7080 struct buffer_head *blk_bh) 7081 { 7082 int ret, indexed = 0; 7083 struct buffer_head *new_blk_bh = NULL; 7084 struct ocfs2_xattr_block *xb = 7085 (struct ocfs2_xattr_block *)blk_bh->b_data; 7086 7087 7088 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) 7089 indexed = 1; 7090 7091 ret = ocfs2_create_empty_xattr_block(args->new_inode, args->new_bh, 7092 &new_blk_bh, indexed); 7093 if (ret) { 7094 mlog_errno(ret); 7095 goto out; 7096 } 7097 7098 if (!indexed) 7099 ret = ocfs2_reflink_xattr_block(args, blk_bh, new_blk_bh); 7100 else 7101 ret = ocfs2_reflink_xattr_tree(args, blk_bh, new_blk_bh); 7102 if (ret) 7103 mlog_errno(ret); 7104 7105 out: 7106 brelse(new_blk_bh); 7107 return ret; 7108 } 7109 7110 static int ocfs2_reflink_xattr_no_security(struct ocfs2_xattr_entry *xe) 7111 { 7112 int type = ocfs2_xattr_get_type(xe); 7113 7114 return type != OCFS2_XATTR_INDEX_SECURITY && 7115 type != OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS && 7116 type != OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT; 7117 } 7118 7119 int ocfs2_reflink_xattrs(struct inode *old_inode, 7120 struct buffer_head *old_bh, 7121 struct inode *new_inode, 7122 struct buffer_head *new_bh, 7123 bool preserve_security) 7124 { 7125 int ret; 7126 struct ocfs2_xattr_reflink args; 7127 struct ocfs2_inode_info *oi = OCFS2_I(old_inode); 7128 struct ocfs2_dinode *di = (struct ocfs2_dinode *)old_bh->b_data; 7129 struct buffer_head *blk_bh = NULL; 7130 struct ocfs2_cached_dealloc_ctxt dealloc; 7131 struct ocfs2_refcount_tree *ref_tree; 7132 struct buffer_head *ref_root_bh = NULL; 7133 7134 ret = ocfs2_lock_refcount_tree(OCFS2_SB(old_inode->i_sb), 7135 le64_to_cpu(di->i_refcount_loc), 7136 1, &ref_tree, &ref_root_bh); 7137 if (ret) { 7138 mlog_errno(ret); 7139 goto out; 7140 } 7141 7142 ocfs2_init_dealloc_ctxt(&dealloc); 7143 7144 args.old_inode = old_inode; 7145 args.new_inode = new_inode; 7146 args.old_bh = old_bh; 7147 args.new_bh = new_bh; 7148 args.ref_ci = &ref_tree->rf_ci; 7149 args.ref_root_bh = ref_root_bh; 7150 args.dealloc = &dealloc; 7151 if (preserve_security) 7152 args.xattr_reflinked = NULL; 7153 else 7154 args.xattr_reflinked = ocfs2_reflink_xattr_no_security; 7155 7156 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 7157 ret = ocfs2_reflink_xattr_inline(&args); 7158 if (ret) { 7159 mlog_errno(ret); 7160 goto out_unlock; 7161 } 7162 } 7163 7164 if (!di->i_xattr_loc) 7165 goto out_unlock; 7166 7167 ret = ocfs2_read_xattr_block(old_inode, le64_to_cpu(di->i_xattr_loc), 7168 &blk_bh); 7169 if (ret < 0) { 7170 mlog_errno(ret); 7171 goto out_unlock; 7172 } 7173 7174 ret = ocfs2_reflink_xattr_in_block(&args, blk_bh); 7175 if (ret) 7176 mlog_errno(ret); 7177 7178 brelse(blk_bh); 7179 7180 out_unlock: 7181 ocfs2_unlock_refcount_tree(OCFS2_SB(old_inode->i_sb), 7182 ref_tree, 1); 7183 brelse(ref_root_bh); 7184 7185 if (ocfs2_dealloc_has_cluster(&dealloc)) { 7186 ocfs2_schedule_truncate_log_flush(OCFS2_SB(old_inode->i_sb), 1); 7187 ocfs2_run_deallocs(OCFS2_SB(old_inode->i_sb), &dealloc); 7188 } 7189 7190 out: 7191 return ret; 7192 } 7193 7194 /* 7195 * Initialize security and acl for a already created inode. 7196 * Used for reflink a non-preserve-security file. 7197 * 7198 * It uses common api like ocfs2_xattr_set, so the caller 7199 * must not hold any lock expect i_mutex. 7200 */ 7201 int ocfs2_init_security_and_acl(struct inode *dir, 7202 struct inode *inode, 7203 const struct qstr *qstr, 7204 struct posix_acl *default_acl, 7205 struct posix_acl *acl) 7206 { 7207 struct buffer_head *dir_bh = NULL; 7208 int ret = 0; 7209 7210 ret = ocfs2_init_security_get(inode, dir, qstr, NULL); 7211 if (ret) { 7212 mlog_errno(ret); 7213 goto leave; 7214 } 7215 7216 ret = ocfs2_inode_lock(dir, &dir_bh, 0); 7217 if (ret) { 7218 mlog_errno(ret); 7219 goto leave; 7220 } 7221 7222 if (!ret && default_acl) 7223 ret = ocfs2_iop_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); 7224 if (!ret && acl) 7225 ret = ocfs2_iop_set_acl(inode, acl, ACL_TYPE_ACCESS); 7226 7227 ocfs2_inode_unlock(dir, 0); 7228 brelse(dir_bh); 7229 leave: 7230 return ret; 7231 } 7232 /* 7233 * 'security' attributes support 7234 */ 7235 static size_t ocfs2_xattr_security_list(struct dentry *dentry, char *list, 7236 size_t list_size, const char *name, 7237 size_t name_len, int type) 7238 { 7239 const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN; 7240 const size_t total_len = prefix_len + name_len + 1; 7241 7242 if (list && total_len <= list_size) { 7243 memcpy(list, XATTR_SECURITY_PREFIX, prefix_len); 7244 memcpy(list + prefix_len, name, name_len); 7245 list[prefix_len + name_len] = '\0'; 7246 } 7247 return total_len; 7248 } 7249 7250 static int ocfs2_xattr_security_get(struct dentry *dentry, const char *name, 7251 void *buffer, size_t size, int type) 7252 { 7253 if (strcmp(name, "") == 0) 7254 return -EINVAL; 7255 return ocfs2_xattr_get(d_inode(dentry), OCFS2_XATTR_INDEX_SECURITY, 7256 name, buffer, size); 7257 } 7258 7259 static int ocfs2_xattr_security_set(struct dentry *dentry, const char *name, 7260 const void *value, size_t size, int flags, int type) 7261 { 7262 if (strcmp(name, "") == 0) 7263 return -EINVAL; 7264 7265 return ocfs2_xattr_set(d_inode(dentry), OCFS2_XATTR_INDEX_SECURITY, 7266 name, value, size, flags); 7267 } 7268 7269 static int ocfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array, 7270 void *fs_info) 7271 { 7272 const struct xattr *xattr; 7273 int err = 0; 7274 7275 for (xattr = xattr_array; xattr->name != NULL; xattr++) { 7276 err = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, 7277 xattr->name, xattr->value, 7278 xattr->value_len, XATTR_CREATE); 7279 if (err) 7280 break; 7281 } 7282 return err; 7283 } 7284 7285 int ocfs2_init_security_get(struct inode *inode, 7286 struct inode *dir, 7287 const struct qstr *qstr, 7288 struct ocfs2_security_xattr_info *si) 7289 { 7290 /* check whether ocfs2 support feature xattr */ 7291 if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb))) 7292 return -EOPNOTSUPP; 7293 if (si) 7294 return security_old_inode_init_security(inode, dir, qstr, 7295 &si->name, &si->value, 7296 &si->value_len); 7297 7298 return security_inode_init_security(inode, dir, qstr, 7299 &ocfs2_initxattrs, NULL); 7300 } 7301 7302 int ocfs2_init_security_set(handle_t *handle, 7303 struct inode *inode, 7304 struct buffer_head *di_bh, 7305 struct ocfs2_security_xattr_info *si, 7306 struct ocfs2_alloc_context *xattr_ac, 7307 struct ocfs2_alloc_context *data_ac) 7308 { 7309 return ocfs2_xattr_set_handle(handle, inode, di_bh, 7310 OCFS2_XATTR_INDEX_SECURITY, 7311 si->name, si->value, si->value_len, 0, 7312 xattr_ac, data_ac); 7313 } 7314 7315 const struct xattr_handler ocfs2_xattr_security_handler = { 7316 .prefix = XATTR_SECURITY_PREFIX, 7317 .list = ocfs2_xattr_security_list, 7318 .get = ocfs2_xattr_security_get, 7319 .set = ocfs2_xattr_security_set, 7320 }; 7321 7322 /* 7323 * 'trusted' attributes support 7324 */ 7325 static size_t ocfs2_xattr_trusted_list(struct dentry *dentry, char *list, 7326 size_t list_size, const char *name, 7327 size_t name_len, int type) 7328 { 7329 const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN; 7330 const size_t total_len = prefix_len + name_len + 1; 7331 7332 if (!capable(CAP_SYS_ADMIN)) 7333 return 0; 7334 7335 if (list && total_len <= list_size) { 7336 memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len); 7337 memcpy(list + prefix_len, name, name_len); 7338 list[prefix_len + name_len] = '\0'; 7339 } 7340 return total_len; 7341 } 7342 7343 static int ocfs2_xattr_trusted_get(struct dentry *dentry, const char *name, 7344 void *buffer, size_t size, int type) 7345 { 7346 if (strcmp(name, "") == 0) 7347 return -EINVAL; 7348 return ocfs2_xattr_get(d_inode(dentry), OCFS2_XATTR_INDEX_TRUSTED, 7349 name, buffer, size); 7350 } 7351 7352 static int ocfs2_xattr_trusted_set(struct dentry *dentry, const char *name, 7353 const void *value, size_t size, int flags, int type) 7354 { 7355 if (strcmp(name, "") == 0) 7356 return -EINVAL; 7357 7358 return ocfs2_xattr_set(d_inode(dentry), OCFS2_XATTR_INDEX_TRUSTED, 7359 name, value, size, flags); 7360 } 7361 7362 const struct xattr_handler ocfs2_xattr_trusted_handler = { 7363 .prefix = XATTR_TRUSTED_PREFIX, 7364 .list = ocfs2_xattr_trusted_list, 7365 .get = ocfs2_xattr_trusted_get, 7366 .set = ocfs2_xattr_trusted_set, 7367 }; 7368 7369 /* 7370 * 'user' attributes support 7371 */ 7372 static size_t ocfs2_xattr_user_list(struct dentry *dentry, char *list, 7373 size_t list_size, const char *name, 7374 size_t name_len, int type) 7375 { 7376 const size_t prefix_len = XATTR_USER_PREFIX_LEN; 7377 const size_t total_len = prefix_len + name_len + 1; 7378 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); 7379 7380 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 7381 return 0; 7382 7383 if (list && total_len <= list_size) { 7384 memcpy(list, XATTR_USER_PREFIX, prefix_len); 7385 memcpy(list + prefix_len, name, name_len); 7386 list[prefix_len + name_len] = '\0'; 7387 } 7388 return total_len; 7389 } 7390 7391 static int ocfs2_xattr_user_get(struct dentry *dentry, const char *name, 7392 void *buffer, size_t size, int type) 7393 { 7394 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); 7395 7396 if (strcmp(name, "") == 0) 7397 return -EINVAL; 7398 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 7399 return -EOPNOTSUPP; 7400 return ocfs2_xattr_get(d_inode(dentry), OCFS2_XATTR_INDEX_USER, name, 7401 buffer, size); 7402 } 7403 7404 static int ocfs2_xattr_user_set(struct dentry *dentry, const char *name, 7405 const void *value, size_t size, int flags, int type) 7406 { 7407 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); 7408 7409 if (strcmp(name, "") == 0) 7410 return -EINVAL; 7411 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 7412 return -EOPNOTSUPP; 7413 7414 return ocfs2_xattr_set(d_inode(dentry), OCFS2_XATTR_INDEX_USER, 7415 name, value, size, flags); 7416 } 7417 7418 const struct xattr_handler ocfs2_xattr_user_handler = { 7419 .prefix = XATTR_USER_PREFIX, 7420 .list = ocfs2_xattr_user_list, 7421 .get = ocfs2_xattr_user_get, 7422 .set = ocfs2_xattr_user_set, 7423 }; 7424