1 /* -*- mode: c; c-basic-offset: 8; -*- 2 * vim: noexpandtab sw=8 ts=8 sts=0: 3 * 4 * xattr.c 5 * 6 * Copyright (C) 2004, 2008 Oracle. All rights reserved. 7 * 8 * CREDITS: 9 * Lots of code in this file is copy from linux/fs/ext3/xattr.c. 10 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> 11 * 12 * This program is free software; you can redistribute it and/or 13 * modify it under the terms of the GNU General Public 14 * License version 2 as published by the Free Software Foundation. 15 * 16 * This program is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 * General Public License for more details. 20 */ 21 22 #include <linux/capability.h> 23 #include <linux/fs.h> 24 #include <linux/types.h> 25 #include <linux/slab.h> 26 #include <linux/highmem.h> 27 #include <linux/pagemap.h> 28 #include <linux/uio.h> 29 #include <linux/sched.h> 30 #include <linux/splice.h> 31 #include <linux/mount.h> 32 #include <linux/writeback.h> 33 #include <linux/falloc.h> 34 #include <linux/sort.h> 35 #include <linux/init.h> 36 #include <linux/module.h> 37 #include <linux/string.h> 38 #include <linux/security.h> 39 40 #include <cluster/masklog.h> 41 42 #include "ocfs2.h" 43 #include "alloc.h" 44 #include "blockcheck.h" 45 #include "dlmglue.h" 46 #include "file.h" 47 #include "symlink.h" 48 #include "sysfile.h" 49 #include "inode.h" 50 #include "journal.h" 51 #include "ocfs2_fs.h" 52 #include "suballoc.h" 53 #include "uptodate.h" 54 #include "buffer_head_io.h" 55 #include "super.h" 56 #include "xattr.h" 57 #include "refcounttree.h" 58 #include "acl.h" 59 #include "ocfs2_trace.h" 60 61 struct ocfs2_xattr_def_value_root { 62 struct ocfs2_xattr_value_root xv; 63 struct ocfs2_extent_rec er; 64 }; 65 66 struct ocfs2_xattr_bucket { 67 /* The inode these xattrs are associated with */ 68 struct inode *bu_inode; 69 70 /* The actual buffers that make up the bucket */ 71 struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET]; 72 73 /* How many blocks make up one bucket for this filesystem */ 74 int bu_blocks; 75 }; 76 77 struct ocfs2_xattr_set_ctxt { 78 handle_t *handle; 79 struct ocfs2_alloc_context *meta_ac; 80 struct ocfs2_alloc_context *data_ac; 81 struct ocfs2_cached_dealloc_ctxt dealloc; 82 int set_abort; 83 }; 84 85 #define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root)) 86 #define OCFS2_XATTR_INLINE_SIZE 80 87 #define OCFS2_XATTR_HEADER_GAP 4 88 #define OCFS2_XATTR_FREE_IN_IBODY (OCFS2_MIN_XATTR_INLINE_SIZE \ 89 - sizeof(struct ocfs2_xattr_header) \ 90 - OCFS2_XATTR_HEADER_GAP) 91 #define OCFS2_XATTR_FREE_IN_BLOCK(ptr) ((ptr)->i_sb->s_blocksize \ 92 - sizeof(struct ocfs2_xattr_block) \ 93 - sizeof(struct ocfs2_xattr_header) \ 94 - OCFS2_XATTR_HEADER_GAP) 95 96 static struct ocfs2_xattr_def_value_root def_xv = { 97 .xv.xr_list.l_count = cpu_to_le16(1), 98 }; 99 100 const struct xattr_handler *ocfs2_xattr_handlers[] = { 101 &ocfs2_xattr_user_handler, 102 &posix_acl_access_xattr_handler, 103 &posix_acl_default_xattr_handler, 104 &ocfs2_xattr_trusted_handler, 105 &ocfs2_xattr_security_handler, 106 NULL 107 }; 108 109 static const struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = { 110 [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler, 111 [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS] 112 = &posix_acl_access_xattr_handler, 113 [OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT] 114 = &posix_acl_default_xattr_handler, 115 [OCFS2_XATTR_INDEX_TRUSTED] = &ocfs2_xattr_trusted_handler, 116 [OCFS2_XATTR_INDEX_SECURITY] = &ocfs2_xattr_security_handler, 117 }; 118 119 struct ocfs2_xattr_info { 120 int xi_name_index; 121 const char *xi_name; 122 int xi_name_len; 123 const void *xi_value; 124 size_t xi_value_len; 125 }; 126 127 struct ocfs2_xattr_search { 128 struct buffer_head *inode_bh; 129 /* 130 * xattr_bh point to the block buffer head which has extended attribute 131 * when extended attribute in inode, xattr_bh is equal to inode_bh. 132 */ 133 struct buffer_head *xattr_bh; 134 struct ocfs2_xattr_header *header; 135 struct ocfs2_xattr_bucket *bucket; 136 void *base; 137 void *end; 138 struct ocfs2_xattr_entry *here; 139 int not_found; 140 }; 141 142 /* Operations on struct ocfs2_xa_entry */ 143 struct ocfs2_xa_loc; 144 struct ocfs2_xa_loc_operations { 145 /* 146 * Journal functions 147 */ 148 int (*xlo_journal_access)(handle_t *handle, struct ocfs2_xa_loc *loc, 149 int type); 150 void (*xlo_journal_dirty)(handle_t *handle, struct ocfs2_xa_loc *loc); 151 152 /* 153 * Return a pointer to the appropriate buffer in loc->xl_storage 154 * at the given offset from loc->xl_header. 155 */ 156 void *(*xlo_offset_pointer)(struct ocfs2_xa_loc *loc, int offset); 157 158 /* Can we reuse the existing entry for the new value? */ 159 int (*xlo_can_reuse)(struct ocfs2_xa_loc *loc, 160 struct ocfs2_xattr_info *xi); 161 162 /* How much space is needed for the new value? */ 163 int (*xlo_check_space)(struct ocfs2_xa_loc *loc, 164 struct ocfs2_xattr_info *xi); 165 166 /* 167 * Return the offset of the first name+value pair. This is 168 * the start of our downward-filling free space. 169 */ 170 int (*xlo_get_free_start)(struct ocfs2_xa_loc *loc); 171 172 /* 173 * Remove the name+value at this location. Do whatever is 174 * appropriate with the remaining name+value pairs. 175 */ 176 void (*xlo_wipe_namevalue)(struct ocfs2_xa_loc *loc); 177 178 /* Fill xl_entry with a new entry */ 179 void (*xlo_add_entry)(struct ocfs2_xa_loc *loc, u32 name_hash); 180 181 /* Add name+value storage to an entry */ 182 void (*xlo_add_namevalue)(struct ocfs2_xa_loc *loc, int size); 183 184 /* 185 * Initialize the value buf's access and bh fields for this entry. 186 * ocfs2_xa_fill_value_buf() will handle the xv pointer. 187 */ 188 void (*xlo_fill_value_buf)(struct ocfs2_xa_loc *loc, 189 struct ocfs2_xattr_value_buf *vb); 190 }; 191 192 /* 193 * Describes an xattr entry location. This is a memory structure 194 * tracking the on-disk structure. 195 */ 196 struct ocfs2_xa_loc { 197 /* This xattr belongs to this inode */ 198 struct inode *xl_inode; 199 200 /* The ocfs2_xattr_header inside the on-disk storage. Not NULL. */ 201 struct ocfs2_xattr_header *xl_header; 202 203 /* Bytes from xl_header to the end of the storage */ 204 int xl_size; 205 206 /* 207 * The ocfs2_xattr_entry this location describes. If this is 208 * NULL, this location describes the on-disk structure where it 209 * would have been. 210 */ 211 struct ocfs2_xattr_entry *xl_entry; 212 213 /* 214 * Internal housekeeping 215 */ 216 217 /* Buffer(s) containing this entry */ 218 void *xl_storage; 219 220 /* Operations on the storage backing this location */ 221 const struct ocfs2_xa_loc_operations *xl_ops; 222 }; 223 224 /* 225 * Convenience functions to calculate how much space is needed for a 226 * given name+value pair 227 */ 228 static int namevalue_size(int name_len, uint64_t value_len) 229 { 230 if (value_len > OCFS2_XATTR_INLINE_SIZE) 231 return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE; 232 else 233 return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len); 234 } 235 236 static int namevalue_size_xi(struct ocfs2_xattr_info *xi) 237 { 238 return namevalue_size(xi->xi_name_len, xi->xi_value_len); 239 } 240 241 static int namevalue_size_xe(struct ocfs2_xattr_entry *xe) 242 { 243 u64 value_len = le64_to_cpu(xe->xe_value_size); 244 245 BUG_ON((value_len > OCFS2_XATTR_INLINE_SIZE) && 246 ocfs2_xattr_is_local(xe)); 247 return namevalue_size(xe->xe_name_len, value_len); 248 } 249 250 251 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb, 252 struct ocfs2_xattr_header *xh, 253 int index, 254 int *block_off, 255 int *new_offset); 256 257 static int ocfs2_xattr_block_find(struct inode *inode, 258 int name_index, 259 const char *name, 260 struct ocfs2_xattr_search *xs); 261 static int ocfs2_xattr_index_block_find(struct inode *inode, 262 struct buffer_head *root_bh, 263 int name_index, 264 const char *name, 265 struct ocfs2_xattr_search *xs); 266 267 static int ocfs2_xattr_tree_list_index_block(struct inode *inode, 268 struct buffer_head *blk_bh, 269 char *buffer, 270 size_t buffer_size); 271 272 static int ocfs2_xattr_create_index_block(struct inode *inode, 273 struct ocfs2_xattr_search *xs, 274 struct ocfs2_xattr_set_ctxt *ctxt); 275 276 static int ocfs2_xattr_set_entry_index_block(struct inode *inode, 277 struct ocfs2_xattr_info *xi, 278 struct ocfs2_xattr_search *xs, 279 struct ocfs2_xattr_set_ctxt *ctxt); 280 281 typedef int (xattr_tree_rec_func)(struct inode *inode, 282 struct buffer_head *root_bh, 283 u64 blkno, u32 cpos, u32 len, void *para); 284 static int ocfs2_iterate_xattr_index_block(struct inode *inode, 285 struct buffer_head *root_bh, 286 xattr_tree_rec_func *rec_func, 287 void *para); 288 static int ocfs2_delete_xattr_in_bucket(struct inode *inode, 289 struct ocfs2_xattr_bucket *bucket, 290 void *para); 291 static int ocfs2_rm_xattr_cluster(struct inode *inode, 292 struct buffer_head *root_bh, 293 u64 blkno, 294 u32 cpos, 295 u32 len, 296 void *para); 297 298 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle, 299 u64 src_blk, u64 last_blk, u64 to_blk, 300 unsigned int start_bucket, 301 u32 *first_hash); 302 static int ocfs2_prepare_refcount_xattr(struct inode *inode, 303 struct ocfs2_dinode *di, 304 struct ocfs2_xattr_info *xi, 305 struct ocfs2_xattr_search *xis, 306 struct ocfs2_xattr_search *xbs, 307 struct ocfs2_refcount_tree **ref_tree, 308 int *meta_need, 309 int *credits); 310 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb, 311 struct ocfs2_xattr_bucket *bucket, 312 int offset, 313 struct ocfs2_xattr_value_root **xv, 314 struct buffer_head **bh); 315 316 static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb) 317 { 318 return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE; 319 } 320 321 static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb) 322 { 323 return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits); 324 } 325 326 #define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr) 327 #define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data) 328 #define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0)) 329 330 static struct ocfs2_xattr_bucket *ocfs2_xattr_bucket_new(struct inode *inode) 331 { 332 struct ocfs2_xattr_bucket *bucket; 333 int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 334 335 BUG_ON(blks > OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET); 336 337 bucket = kzalloc(sizeof(struct ocfs2_xattr_bucket), GFP_NOFS); 338 if (bucket) { 339 bucket->bu_inode = inode; 340 bucket->bu_blocks = blks; 341 } 342 343 return bucket; 344 } 345 346 static void ocfs2_xattr_bucket_relse(struct ocfs2_xattr_bucket *bucket) 347 { 348 int i; 349 350 for (i = 0; i < bucket->bu_blocks; i++) { 351 brelse(bucket->bu_bhs[i]); 352 bucket->bu_bhs[i] = NULL; 353 } 354 } 355 356 static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket) 357 { 358 if (bucket) { 359 ocfs2_xattr_bucket_relse(bucket); 360 bucket->bu_inode = NULL; 361 kfree(bucket); 362 } 363 } 364 365 /* 366 * A bucket that has never been written to disk doesn't need to be 367 * read. We just need the buffer_heads. Don't call this for 368 * buckets that are already on disk. ocfs2_read_xattr_bucket() initializes 369 * them fully. 370 */ 371 static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket, 372 u64 xb_blkno, int new) 373 { 374 int i, rc = 0; 375 376 for (i = 0; i < bucket->bu_blocks; i++) { 377 bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb, 378 xb_blkno + i); 379 if (!bucket->bu_bhs[i]) { 380 rc = -ENOMEM; 381 mlog_errno(rc); 382 break; 383 } 384 385 if (!ocfs2_buffer_uptodate(INODE_CACHE(bucket->bu_inode), 386 bucket->bu_bhs[i])) { 387 if (new) 388 ocfs2_set_new_buffer_uptodate(INODE_CACHE(bucket->bu_inode), 389 bucket->bu_bhs[i]); 390 else { 391 set_buffer_uptodate(bucket->bu_bhs[i]); 392 ocfs2_set_buffer_uptodate(INODE_CACHE(bucket->bu_inode), 393 bucket->bu_bhs[i]); 394 } 395 } 396 } 397 398 if (rc) 399 ocfs2_xattr_bucket_relse(bucket); 400 return rc; 401 } 402 403 /* Read the xattr bucket at xb_blkno */ 404 static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket, 405 u64 xb_blkno) 406 { 407 int rc; 408 409 rc = ocfs2_read_blocks(INODE_CACHE(bucket->bu_inode), xb_blkno, 410 bucket->bu_blocks, bucket->bu_bhs, 0, 411 NULL); 412 if (!rc) { 413 spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 414 rc = ocfs2_validate_meta_ecc_bhs(bucket->bu_inode->i_sb, 415 bucket->bu_bhs, 416 bucket->bu_blocks, 417 &bucket_xh(bucket)->xh_check); 418 spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 419 if (rc) 420 mlog_errno(rc); 421 } 422 423 if (rc) 424 ocfs2_xattr_bucket_relse(bucket); 425 return rc; 426 } 427 428 static int ocfs2_xattr_bucket_journal_access(handle_t *handle, 429 struct ocfs2_xattr_bucket *bucket, 430 int type) 431 { 432 int i, rc = 0; 433 434 for (i = 0; i < bucket->bu_blocks; i++) { 435 rc = ocfs2_journal_access(handle, 436 INODE_CACHE(bucket->bu_inode), 437 bucket->bu_bhs[i], type); 438 if (rc) { 439 mlog_errno(rc); 440 break; 441 } 442 } 443 444 return rc; 445 } 446 447 static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle, 448 struct ocfs2_xattr_bucket *bucket) 449 { 450 int i; 451 452 spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 453 ocfs2_compute_meta_ecc_bhs(bucket->bu_inode->i_sb, 454 bucket->bu_bhs, bucket->bu_blocks, 455 &bucket_xh(bucket)->xh_check); 456 spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 457 458 for (i = 0; i < bucket->bu_blocks; i++) 459 ocfs2_journal_dirty(handle, bucket->bu_bhs[i]); 460 } 461 462 static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest, 463 struct ocfs2_xattr_bucket *src) 464 { 465 int i; 466 int blocksize = src->bu_inode->i_sb->s_blocksize; 467 468 BUG_ON(dest->bu_blocks != src->bu_blocks); 469 BUG_ON(dest->bu_inode != src->bu_inode); 470 471 for (i = 0; i < src->bu_blocks; i++) { 472 memcpy(bucket_block(dest, i), bucket_block(src, i), 473 blocksize); 474 } 475 } 476 477 static int ocfs2_validate_xattr_block(struct super_block *sb, 478 struct buffer_head *bh) 479 { 480 int rc; 481 struct ocfs2_xattr_block *xb = 482 (struct ocfs2_xattr_block *)bh->b_data; 483 484 trace_ocfs2_validate_xattr_block((unsigned long long)bh->b_blocknr); 485 486 BUG_ON(!buffer_uptodate(bh)); 487 488 /* 489 * If the ecc fails, we return the error but otherwise 490 * leave the filesystem running. We know any error is 491 * local to this block. 492 */ 493 rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &xb->xb_check); 494 if (rc) 495 return rc; 496 497 /* 498 * Errors after here are fatal 499 */ 500 501 if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) { 502 return ocfs2_error(sb, 503 "Extended attribute block #%llu has bad signature %.*s\n", 504 (unsigned long long)bh->b_blocknr, 7, 505 xb->xb_signature); 506 } 507 508 if (le64_to_cpu(xb->xb_blkno) != bh->b_blocknr) { 509 return ocfs2_error(sb, 510 "Extended attribute block #%llu has an invalid xb_blkno of %llu\n", 511 (unsigned long long)bh->b_blocknr, 512 (unsigned long long)le64_to_cpu(xb->xb_blkno)); 513 } 514 515 if (le32_to_cpu(xb->xb_fs_generation) != OCFS2_SB(sb)->fs_generation) { 516 return ocfs2_error(sb, 517 "Extended attribute block #%llu has an invalid xb_fs_generation of #%u\n", 518 (unsigned long long)bh->b_blocknr, 519 le32_to_cpu(xb->xb_fs_generation)); 520 } 521 522 return 0; 523 } 524 525 static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno, 526 struct buffer_head **bh) 527 { 528 int rc; 529 struct buffer_head *tmp = *bh; 530 531 rc = ocfs2_read_block(INODE_CACHE(inode), xb_blkno, &tmp, 532 ocfs2_validate_xattr_block); 533 534 /* If ocfs2_read_block() got us a new bh, pass it up. */ 535 if (!rc && !*bh) 536 *bh = tmp; 537 538 return rc; 539 } 540 541 static inline const char *ocfs2_xattr_prefix(int name_index) 542 { 543 const struct xattr_handler *handler = NULL; 544 545 if (name_index > 0 && name_index < OCFS2_XATTR_MAX) 546 handler = ocfs2_xattr_handler_map[name_index]; 547 return handler ? xattr_prefix(handler) : NULL; 548 } 549 550 static u32 ocfs2_xattr_name_hash(struct inode *inode, 551 const char *name, 552 int name_len) 553 { 554 /* Get hash value of uuid from super block */ 555 u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash; 556 int i; 557 558 /* hash extended attribute name */ 559 for (i = 0; i < name_len; i++) { 560 hash = (hash << OCFS2_HASH_SHIFT) ^ 561 (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^ 562 *name++; 563 } 564 565 return hash; 566 } 567 568 static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len) 569 { 570 return namevalue_size(name_len, value_len) + 571 sizeof(struct ocfs2_xattr_entry); 572 } 573 574 static int ocfs2_xi_entry_usage(struct ocfs2_xattr_info *xi) 575 { 576 return namevalue_size_xi(xi) + 577 sizeof(struct ocfs2_xattr_entry); 578 } 579 580 static int ocfs2_xe_entry_usage(struct ocfs2_xattr_entry *xe) 581 { 582 return namevalue_size_xe(xe) + 583 sizeof(struct ocfs2_xattr_entry); 584 } 585 586 int ocfs2_calc_security_init(struct inode *dir, 587 struct ocfs2_security_xattr_info *si, 588 int *want_clusters, 589 int *xattr_credits, 590 struct ocfs2_alloc_context **xattr_ac) 591 { 592 int ret = 0; 593 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); 594 int s_size = ocfs2_xattr_entry_real_size(strlen(si->name), 595 si->value_len); 596 597 /* 598 * The max space of security xattr taken inline is 599 * 256(name) + 80(value) + 16(entry) = 352 bytes, 600 * So reserve one metadata block for it is ok. 601 */ 602 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE || 603 s_size > OCFS2_XATTR_FREE_IN_IBODY) { 604 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac); 605 if (ret) { 606 mlog_errno(ret); 607 return ret; 608 } 609 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; 610 } 611 612 /* reserve clusters for xattr value which will be set in B tree*/ 613 if (si->value_len > OCFS2_XATTR_INLINE_SIZE) { 614 int new_clusters = ocfs2_clusters_for_bytes(dir->i_sb, 615 si->value_len); 616 617 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb, 618 new_clusters); 619 *want_clusters += new_clusters; 620 } 621 return ret; 622 } 623 624 int ocfs2_calc_xattr_init(struct inode *dir, 625 struct buffer_head *dir_bh, 626 umode_t mode, 627 struct ocfs2_security_xattr_info *si, 628 int *want_clusters, 629 int *xattr_credits, 630 int *want_meta) 631 { 632 int ret = 0; 633 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); 634 int s_size = 0, a_size = 0, acl_len = 0, new_clusters; 635 636 if (si->enable) 637 s_size = ocfs2_xattr_entry_real_size(strlen(si->name), 638 si->value_len); 639 640 if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) { 641 acl_len = ocfs2_xattr_get_nolock(dir, dir_bh, 642 OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT, 643 "", NULL, 0); 644 if (acl_len > 0) { 645 a_size = ocfs2_xattr_entry_real_size(0, acl_len); 646 if (S_ISDIR(mode)) 647 a_size <<= 1; 648 } else if (acl_len != 0 && acl_len != -ENODATA) { 649 mlog_errno(ret); 650 return ret; 651 } 652 } 653 654 if (!(s_size + a_size)) 655 return ret; 656 657 /* 658 * The max space of security xattr taken inline is 659 * 256(name) + 80(value) + 16(entry) = 352 bytes, 660 * The max space of acl xattr taken inline is 661 * 80(value) + 16(entry) * 2(if directory) = 192 bytes, 662 * when blocksize = 512, may reserve one more cluser for 663 * xattr bucket, otherwise reserve one metadata block 664 * for them is ok. 665 * If this is a new directory with inline data, 666 * we choose to reserve the entire inline area for 667 * directory contents and force an external xattr block. 668 */ 669 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE || 670 (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) || 671 (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) { 672 *want_meta = *want_meta + 1; 673 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; 674 } 675 676 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE && 677 (s_size + a_size) > OCFS2_XATTR_FREE_IN_BLOCK(dir)) { 678 *want_clusters += 1; 679 *xattr_credits += ocfs2_blocks_per_xattr_bucket(dir->i_sb); 680 } 681 682 /* 683 * reserve credits and clusters for xattrs which has large value 684 * and have to be set outside 685 */ 686 if (si->enable && si->value_len > OCFS2_XATTR_INLINE_SIZE) { 687 new_clusters = ocfs2_clusters_for_bytes(dir->i_sb, 688 si->value_len); 689 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb, 690 new_clusters); 691 *want_clusters += new_clusters; 692 } 693 if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL && 694 acl_len > OCFS2_XATTR_INLINE_SIZE) { 695 /* for directory, it has DEFAULT and ACCESS two types of acls */ 696 new_clusters = (S_ISDIR(mode) ? 2 : 1) * 697 ocfs2_clusters_for_bytes(dir->i_sb, acl_len); 698 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb, 699 new_clusters); 700 *want_clusters += new_clusters; 701 } 702 703 return ret; 704 } 705 706 static int ocfs2_xattr_extend_allocation(struct inode *inode, 707 u32 clusters_to_add, 708 struct ocfs2_xattr_value_buf *vb, 709 struct ocfs2_xattr_set_ctxt *ctxt) 710 { 711 int status = 0, credits; 712 handle_t *handle = ctxt->handle; 713 enum ocfs2_alloc_restarted why; 714 u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters); 715 struct ocfs2_extent_tree et; 716 717 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); 718 719 while (clusters_to_add) { 720 trace_ocfs2_xattr_extend_allocation(clusters_to_add); 721 722 status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh, 723 OCFS2_JOURNAL_ACCESS_WRITE); 724 if (status < 0) { 725 mlog_errno(status); 726 break; 727 } 728 729 prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters); 730 status = ocfs2_add_clusters_in_btree(handle, 731 &et, 732 &logical_start, 733 clusters_to_add, 734 0, 735 ctxt->data_ac, 736 ctxt->meta_ac, 737 &why); 738 if ((status < 0) && (status != -EAGAIN)) { 739 if (status != -ENOSPC) 740 mlog_errno(status); 741 break; 742 } 743 744 ocfs2_journal_dirty(handle, vb->vb_bh); 745 746 clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - 747 prev_clusters; 748 749 if (why != RESTART_NONE && clusters_to_add) { 750 /* 751 * We can only fail in case the alloc file doesn't give 752 * up enough clusters. 753 */ 754 BUG_ON(why == RESTART_META); 755 756 credits = ocfs2_calc_extend_credits(inode->i_sb, 757 &vb->vb_xv->xr_list); 758 status = ocfs2_extend_trans(handle, credits); 759 if (status < 0) { 760 status = -ENOMEM; 761 mlog_errno(status); 762 break; 763 } 764 } 765 } 766 767 return status; 768 } 769 770 static int __ocfs2_remove_xattr_range(struct inode *inode, 771 struct ocfs2_xattr_value_buf *vb, 772 u32 cpos, u32 phys_cpos, u32 len, 773 unsigned int ext_flags, 774 struct ocfs2_xattr_set_ctxt *ctxt) 775 { 776 int ret; 777 u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); 778 handle_t *handle = ctxt->handle; 779 struct ocfs2_extent_tree et; 780 781 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); 782 783 ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh, 784 OCFS2_JOURNAL_ACCESS_WRITE); 785 if (ret) { 786 mlog_errno(ret); 787 goto out; 788 } 789 790 ret = ocfs2_remove_extent(handle, &et, cpos, len, ctxt->meta_ac, 791 &ctxt->dealloc); 792 if (ret) { 793 mlog_errno(ret); 794 goto out; 795 } 796 797 le32_add_cpu(&vb->vb_xv->xr_clusters, -len); 798 ocfs2_journal_dirty(handle, vb->vb_bh); 799 800 if (ext_flags & OCFS2_EXT_REFCOUNTED) 801 ret = ocfs2_decrease_refcount(inode, handle, 802 ocfs2_blocks_to_clusters(inode->i_sb, 803 phys_blkno), 804 len, ctxt->meta_ac, &ctxt->dealloc, 1); 805 else 806 ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc, 807 phys_blkno, len); 808 if (ret) 809 mlog_errno(ret); 810 811 out: 812 return ret; 813 } 814 815 static int ocfs2_xattr_shrink_size(struct inode *inode, 816 u32 old_clusters, 817 u32 new_clusters, 818 struct ocfs2_xattr_value_buf *vb, 819 struct ocfs2_xattr_set_ctxt *ctxt) 820 { 821 int ret = 0; 822 unsigned int ext_flags; 823 u32 trunc_len, cpos, phys_cpos, alloc_size; 824 u64 block; 825 826 if (old_clusters <= new_clusters) 827 return 0; 828 829 cpos = new_clusters; 830 trunc_len = old_clusters - new_clusters; 831 while (trunc_len) { 832 ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos, 833 &alloc_size, 834 &vb->vb_xv->xr_list, &ext_flags); 835 if (ret) { 836 mlog_errno(ret); 837 goto out; 838 } 839 840 if (alloc_size > trunc_len) 841 alloc_size = trunc_len; 842 843 ret = __ocfs2_remove_xattr_range(inode, vb, cpos, 844 phys_cpos, alloc_size, 845 ext_flags, ctxt); 846 if (ret) { 847 mlog_errno(ret); 848 goto out; 849 } 850 851 block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); 852 ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), 853 block, alloc_size); 854 cpos += alloc_size; 855 trunc_len -= alloc_size; 856 } 857 858 out: 859 return ret; 860 } 861 862 static int ocfs2_xattr_value_truncate(struct inode *inode, 863 struct ocfs2_xattr_value_buf *vb, 864 int len, 865 struct ocfs2_xattr_set_ctxt *ctxt) 866 { 867 int ret; 868 u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len); 869 u32 old_clusters = le32_to_cpu(vb->vb_xv->xr_clusters); 870 871 if (new_clusters == old_clusters) 872 return 0; 873 874 if (new_clusters > old_clusters) 875 ret = ocfs2_xattr_extend_allocation(inode, 876 new_clusters - old_clusters, 877 vb, ctxt); 878 else 879 ret = ocfs2_xattr_shrink_size(inode, 880 old_clusters, new_clusters, 881 vb, ctxt); 882 883 return ret; 884 } 885 886 static int ocfs2_xattr_list_entry(struct super_block *sb, 887 char *buffer, size_t size, 888 size_t *result, int type, 889 const char *name, int name_len) 890 { 891 char *p = buffer + *result; 892 const char *prefix; 893 int prefix_len; 894 int total_len; 895 896 switch(type) { 897 case OCFS2_XATTR_INDEX_USER: 898 if (OCFS2_SB(sb)->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 899 return 0; 900 break; 901 902 case OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS: 903 case OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT: 904 if (!(sb->s_flags & MS_POSIXACL)) 905 return 0; 906 break; 907 908 case OCFS2_XATTR_INDEX_TRUSTED: 909 if (!capable(CAP_SYS_ADMIN)) 910 return 0; 911 break; 912 } 913 914 prefix = ocfs2_xattr_prefix(type); 915 if (!prefix) 916 return 0; 917 prefix_len = strlen(prefix); 918 total_len = prefix_len + name_len + 1; 919 *result += total_len; 920 921 /* we are just looking for how big our buffer needs to be */ 922 if (!size) 923 return 0; 924 925 if (*result > size) 926 return -ERANGE; 927 928 memcpy(p, prefix, prefix_len); 929 memcpy(p + prefix_len, name, name_len); 930 p[prefix_len + name_len] = '\0'; 931 932 return 0; 933 } 934 935 static int ocfs2_xattr_list_entries(struct inode *inode, 936 struct ocfs2_xattr_header *header, 937 char *buffer, size_t buffer_size) 938 { 939 size_t result = 0; 940 int i, type, ret; 941 const char *name; 942 943 for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) { 944 struct ocfs2_xattr_entry *entry = &header->xh_entries[i]; 945 type = ocfs2_xattr_get_type(entry); 946 name = (const char *)header + 947 le16_to_cpu(entry->xe_name_offset); 948 949 ret = ocfs2_xattr_list_entry(inode->i_sb, 950 buffer, buffer_size, 951 &result, type, name, 952 entry->xe_name_len); 953 if (ret) 954 return ret; 955 } 956 957 return result; 958 } 959 960 int ocfs2_has_inline_xattr_value_outside(struct inode *inode, 961 struct ocfs2_dinode *di) 962 { 963 struct ocfs2_xattr_header *xh; 964 int i; 965 966 xh = (struct ocfs2_xattr_header *) 967 ((void *)di + inode->i_sb->s_blocksize - 968 le16_to_cpu(di->i_xattr_inline_size)); 969 970 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) 971 if (!ocfs2_xattr_is_local(&xh->xh_entries[i])) 972 return 1; 973 974 return 0; 975 } 976 977 static int ocfs2_xattr_ibody_list(struct inode *inode, 978 struct ocfs2_dinode *di, 979 char *buffer, 980 size_t buffer_size) 981 { 982 struct ocfs2_xattr_header *header = NULL; 983 struct ocfs2_inode_info *oi = OCFS2_I(inode); 984 int ret = 0; 985 986 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) 987 return ret; 988 989 header = (struct ocfs2_xattr_header *) 990 ((void *)di + inode->i_sb->s_blocksize - 991 le16_to_cpu(di->i_xattr_inline_size)); 992 993 ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size); 994 995 return ret; 996 } 997 998 static int ocfs2_xattr_block_list(struct inode *inode, 999 struct ocfs2_dinode *di, 1000 char *buffer, 1001 size_t buffer_size) 1002 { 1003 struct buffer_head *blk_bh = NULL; 1004 struct ocfs2_xattr_block *xb; 1005 int ret = 0; 1006 1007 if (!di->i_xattr_loc) 1008 return ret; 1009 1010 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc), 1011 &blk_bh); 1012 if (ret < 0) { 1013 mlog_errno(ret); 1014 return ret; 1015 } 1016 1017 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 1018 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 1019 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header; 1020 ret = ocfs2_xattr_list_entries(inode, header, 1021 buffer, buffer_size); 1022 } else 1023 ret = ocfs2_xattr_tree_list_index_block(inode, blk_bh, 1024 buffer, buffer_size); 1025 1026 brelse(blk_bh); 1027 1028 return ret; 1029 } 1030 1031 ssize_t ocfs2_listxattr(struct dentry *dentry, 1032 char *buffer, 1033 size_t size) 1034 { 1035 int ret = 0, i_ret = 0, b_ret = 0; 1036 struct buffer_head *di_bh = NULL; 1037 struct ocfs2_dinode *di = NULL; 1038 struct ocfs2_inode_info *oi = OCFS2_I(d_inode(dentry)); 1039 1040 if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb))) 1041 return -EOPNOTSUPP; 1042 1043 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) 1044 return ret; 1045 1046 ret = ocfs2_inode_lock(d_inode(dentry), &di_bh, 0); 1047 if (ret < 0) { 1048 mlog_errno(ret); 1049 return ret; 1050 } 1051 1052 di = (struct ocfs2_dinode *)di_bh->b_data; 1053 1054 down_read(&oi->ip_xattr_sem); 1055 i_ret = ocfs2_xattr_ibody_list(d_inode(dentry), di, buffer, size); 1056 if (i_ret < 0) 1057 b_ret = 0; 1058 else { 1059 if (buffer) { 1060 buffer += i_ret; 1061 size -= i_ret; 1062 } 1063 b_ret = ocfs2_xattr_block_list(d_inode(dentry), di, 1064 buffer, size); 1065 if (b_ret < 0) 1066 i_ret = 0; 1067 } 1068 up_read(&oi->ip_xattr_sem); 1069 ocfs2_inode_unlock(d_inode(dentry), 0); 1070 1071 brelse(di_bh); 1072 1073 return i_ret + b_ret; 1074 } 1075 1076 static int ocfs2_xattr_find_entry(int name_index, 1077 const char *name, 1078 struct ocfs2_xattr_search *xs) 1079 { 1080 struct ocfs2_xattr_entry *entry; 1081 size_t name_len; 1082 int i, cmp = 1; 1083 1084 if (name == NULL) 1085 return -EINVAL; 1086 1087 name_len = strlen(name); 1088 entry = xs->here; 1089 for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) { 1090 cmp = name_index - ocfs2_xattr_get_type(entry); 1091 if (!cmp) 1092 cmp = name_len - entry->xe_name_len; 1093 if (!cmp) 1094 cmp = memcmp(name, (xs->base + 1095 le16_to_cpu(entry->xe_name_offset)), 1096 name_len); 1097 if (cmp == 0) 1098 break; 1099 entry += 1; 1100 } 1101 xs->here = entry; 1102 1103 return cmp ? -ENODATA : 0; 1104 } 1105 1106 static int ocfs2_xattr_get_value_outside(struct inode *inode, 1107 struct ocfs2_xattr_value_root *xv, 1108 void *buffer, 1109 size_t len) 1110 { 1111 u32 cpos, p_cluster, num_clusters, bpc, clusters; 1112 u64 blkno; 1113 int i, ret = 0; 1114 size_t cplen, blocksize; 1115 struct buffer_head *bh = NULL; 1116 struct ocfs2_extent_list *el; 1117 1118 el = &xv->xr_list; 1119 clusters = le32_to_cpu(xv->xr_clusters); 1120 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 1121 blocksize = inode->i_sb->s_blocksize; 1122 1123 cpos = 0; 1124 while (cpos < clusters) { 1125 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, 1126 &num_clusters, el, NULL); 1127 if (ret) { 1128 mlog_errno(ret); 1129 goto out; 1130 } 1131 1132 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 1133 /* Copy ocfs2_xattr_value */ 1134 for (i = 0; i < num_clusters * bpc; i++, blkno++) { 1135 ret = ocfs2_read_block(INODE_CACHE(inode), blkno, 1136 &bh, NULL); 1137 if (ret) { 1138 mlog_errno(ret); 1139 goto out; 1140 } 1141 1142 cplen = len >= blocksize ? blocksize : len; 1143 memcpy(buffer, bh->b_data, cplen); 1144 len -= cplen; 1145 buffer += cplen; 1146 1147 brelse(bh); 1148 bh = NULL; 1149 if (len == 0) 1150 break; 1151 } 1152 cpos += num_clusters; 1153 } 1154 out: 1155 return ret; 1156 } 1157 1158 static int ocfs2_xattr_ibody_get(struct inode *inode, 1159 int name_index, 1160 const char *name, 1161 void *buffer, 1162 size_t buffer_size, 1163 struct ocfs2_xattr_search *xs) 1164 { 1165 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1166 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 1167 struct ocfs2_xattr_value_root *xv; 1168 size_t size; 1169 int ret = 0; 1170 1171 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) 1172 return -ENODATA; 1173 1174 xs->end = (void *)di + inode->i_sb->s_blocksize; 1175 xs->header = (struct ocfs2_xattr_header *) 1176 (xs->end - le16_to_cpu(di->i_xattr_inline_size)); 1177 xs->base = (void *)xs->header; 1178 xs->here = xs->header->xh_entries; 1179 1180 ret = ocfs2_xattr_find_entry(name_index, name, xs); 1181 if (ret) 1182 return ret; 1183 size = le64_to_cpu(xs->here->xe_value_size); 1184 if (buffer) { 1185 if (size > buffer_size) 1186 return -ERANGE; 1187 if (ocfs2_xattr_is_local(xs->here)) { 1188 memcpy(buffer, (void *)xs->base + 1189 le16_to_cpu(xs->here->xe_name_offset) + 1190 OCFS2_XATTR_SIZE(xs->here->xe_name_len), size); 1191 } else { 1192 xv = (struct ocfs2_xattr_value_root *) 1193 (xs->base + le16_to_cpu( 1194 xs->here->xe_name_offset) + 1195 OCFS2_XATTR_SIZE(xs->here->xe_name_len)); 1196 ret = ocfs2_xattr_get_value_outside(inode, xv, 1197 buffer, size); 1198 if (ret < 0) { 1199 mlog_errno(ret); 1200 return ret; 1201 } 1202 } 1203 } 1204 1205 return size; 1206 } 1207 1208 static int ocfs2_xattr_block_get(struct inode *inode, 1209 int name_index, 1210 const char *name, 1211 void *buffer, 1212 size_t buffer_size, 1213 struct ocfs2_xattr_search *xs) 1214 { 1215 struct ocfs2_xattr_block *xb; 1216 struct ocfs2_xattr_value_root *xv; 1217 size_t size; 1218 int ret = -ENODATA, name_offset, name_len, i; 1219 int uninitialized_var(block_off); 1220 1221 xs->bucket = ocfs2_xattr_bucket_new(inode); 1222 if (!xs->bucket) { 1223 ret = -ENOMEM; 1224 mlog_errno(ret); 1225 goto cleanup; 1226 } 1227 1228 ret = ocfs2_xattr_block_find(inode, name_index, name, xs); 1229 if (ret) { 1230 mlog_errno(ret); 1231 goto cleanup; 1232 } 1233 1234 if (xs->not_found) { 1235 ret = -ENODATA; 1236 goto cleanup; 1237 } 1238 1239 xb = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; 1240 size = le64_to_cpu(xs->here->xe_value_size); 1241 if (buffer) { 1242 ret = -ERANGE; 1243 if (size > buffer_size) 1244 goto cleanup; 1245 1246 name_offset = le16_to_cpu(xs->here->xe_name_offset); 1247 name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len); 1248 i = xs->here - xs->header->xh_entries; 1249 1250 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 1251 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 1252 bucket_xh(xs->bucket), 1253 i, 1254 &block_off, 1255 &name_offset); 1256 if (ret) { 1257 mlog_errno(ret); 1258 goto cleanup; 1259 } 1260 xs->base = bucket_block(xs->bucket, block_off); 1261 } 1262 if (ocfs2_xattr_is_local(xs->here)) { 1263 memcpy(buffer, (void *)xs->base + 1264 name_offset + name_len, size); 1265 } else { 1266 xv = (struct ocfs2_xattr_value_root *) 1267 (xs->base + name_offset + name_len); 1268 ret = ocfs2_xattr_get_value_outside(inode, xv, 1269 buffer, size); 1270 if (ret < 0) { 1271 mlog_errno(ret); 1272 goto cleanup; 1273 } 1274 } 1275 } 1276 ret = size; 1277 cleanup: 1278 ocfs2_xattr_bucket_free(xs->bucket); 1279 1280 brelse(xs->xattr_bh); 1281 xs->xattr_bh = NULL; 1282 return ret; 1283 } 1284 1285 int ocfs2_xattr_get_nolock(struct inode *inode, 1286 struct buffer_head *di_bh, 1287 int name_index, 1288 const char *name, 1289 void *buffer, 1290 size_t buffer_size) 1291 { 1292 int ret; 1293 struct ocfs2_dinode *di = NULL; 1294 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1295 struct ocfs2_xattr_search xis = { 1296 .not_found = -ENODATA, 1297 }; 1298 struct ocfs2_xattr_search xbs = { 1299 .not_found = -ENODATA, 1300 }; 1301 1302 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) 1303 return -EOPNOTSUPP; 1304 1305 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) 1306 return -ENODATA; 1307 1308 xis.inode_bh = xbs.inode_bh = di_bh; 1309 di = (struct ocfs2_dinode *)di_bh->b_data; 1310 1311 ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer, 1312 buffer_size, &xis); 1313 if (ret == -ENODATA && di->i_xattr_loc) 1314 ret = ocfs2_xattr_block_get(inode, name_index, name, buffer, 1315 buffer_size, &xbs); 1316 1317 return ret; 1318 } 1319 1320 /* ocfs2_xattr_get() 1321 * 1322 * Copy an extended attribute into the buffer provided. 1323 * Buffer is NULL to compute the size of buffer required. 1324 */ 1325 static int ocfs2_xattr_get(struct inode *inode, 1326 int name_index, 1327 const char *name, 1328 void *buffer, 1329 size_t buffer_size) 1330 { 1331 int ret; 1332 struct buffer_head *di_bh = NULL; 1333 1334 ret = ocfs2_inode_lock(inode, &di_bh, 0); 1335 if (ret < 0) { 1336 mlog_errno(ret); 1337 return ret; 1338 } 1339 down_read(&OCFS2_I(inode)->ip_xattr_sem); 1340 ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index, 1341 name, buffer, buffer_size); 1342 up_read(&OCFS2_I(inode)->ip_xattr_sem); 1343 1344 ocfs2_inode_unlock(inode, 0); 1345 1346 brelse(di_bh); 1347 1348 return ret; 1349 } 1350 1351 static int __ocfs2_xattr_set_value_outside(struct inode *inode, 1352 handle_t *handle, 1353 struct ocfs2_xattr_value_buf *vb, 1354 const void *value, 1355 int value_len) 1356 { 1357 int ret = 0, i, cp_len; 1358 u16 blocksize = inode->i_sb->s_blocksize; 1359 u32 p_cluster, num_clusters; 1360 u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 1361 u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len); 1362 u64 blkno; 1363 struct buffer_head *bh = NULL; 1364 unsigned int ext_flags; 1365 struct ocfs2_xattr_value_root *xv = vb->vb_xv; 1366 1367 BUG_ON(clusters > le32_to_cpu(xv->xr_clusters)); 1368 1369 while (cpos < clusters) { 1370 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, 1371 &num_clusters, &xv->xr_list, 1372 &ext_flags); 1373 if (ret) { 1374 mlog_errno(ret); 1375 goto out; 1376 } 1377 1378 BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED); 1379 1380 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 1381 1382 for (i = 0; i < num_clusters * bpc; i++, blkno++) { 1383 ret = ocfs2_read_block(INODE_CACHE(inode), blkno, 1384 &bh, NULL); 1385 if (ret) { 1386 mlog_errno(ret); 1387 goto out; 1388 } 1389 1390 ret = ocfs2_journal_access(handle, 1391 INODE_CACHE(inode), 1392 bh, 1393 OCFS2_JOURNAL_ACCESS_WRITE); 1394 if (ret < 0) { 1395 mlog_errno(ret); 1396 goto out; 1397 } 1398 1399 cp_len = value_len > blocksize ? blocksize : value_len; 1400 memcpy(bh->b_data, value, cp_len); 1401 value_len -= cp_len; 1402 value += cp_len; 1403 if (cp_len < blocksize) 1404 memset(bh->b_data + cp_len, 0, 1405 blocksize - cp_len); 1406 1407 ocfs2_journal_dirty(handle, bh); 1408 brelse(bh); 1409 bh = NULL; 1410 1411 /* 1412 * XXX: do we need to empty all the following 1413 * blocks in this cluster? 1414 */ 1415 if (!value_len) 1416 break; 1417 } 1418 cpos += num_clusters; 1419 } 1420 out: 1421 brelse(bh); 1422 1423 return ret; 1424 } 1425 1426 static int ocfs2_xa_check_space_helper(int needed_space, int free_start, 1427 int num_entries) 1428 { 1429 int free_space; 1430 1431 if (!needed_space) 1432 return 0; 1433 1434 free_space = free_start - 1435 sizeof(struct ocfs2_xattr_header) - 1436 (num_entries * sizeof(struct ocfs2_xattr_entry)) - 1437 OCFS2_XATTR_HEADER_GAP; 1438 if (free_space < 0) 1439 return -EIO; 1440 if (free_space < needed_space) 1441 return -ENOSPC; 1442 1443 return 0; 1444 } 1445 1446 static int ocfs2_xa_journal_access(handle_t *handle, struct ocfs2_xa_loc *loc, 1447 int type) 1448 { 1449 return loc->xl_ops->xlo_journal_access(handle, loc, type); 1450 } 1451 1452 static void ocfs2_xa_journal_dirty(handle_t *handle, struct ocfs2_xa_loc *loc) 1453 { 1454 loc->xl_ops->xlo_journal_dirty(handle, loc); 1455 } 1456 1457 /* Give a pointer into the storage for the given offset */ 1458 static void *ocfs2_xa_offset_pointer(struct ocfs2_xa_loc *loc, int offset) 1459 { 1460 BUG_ON(offset >= loc->xl_size); 1461 return loc->xl_ops->xlo_offset_pointer(loc, offset); 1462 } 1463 1464 /* 1465 * Wipe the name+value pair and allow the storage to reclaim it. This 1466 * must be followed by either removal of the entry or a call to 1467 * ocfs2_xa_add_namevalue(). 1468 */ 1469 static void ocfs2_xa_wipe_namevalue(struct ocfs2_xa_loc *loc) 1470 { 1471 loc->xl_ops->xlo_wipe_namevalue(loc); 1472 } 1473 1474 /* 1475 * Find lowest offset to a name+value pair. This is the start of our 1476 * downward-growing free space. 1477 */ 1478 static int ocfs2_xa_get_free_start(struct ocfs2_xa_loc *loc) 1479 { 1480 return loc->xl_ops->xlo_get_free_start(loc); 1481 } 1482 1483 /* Can we reuse loc->xl_entry for xi? */ 1484 static int ocfs2_xa_can_reuse_entry(struct ocfs2_xa_loc *loc, 1485 struct ocfs2_xattr_info *xi) 1486 { 1487 return loc->xl_ops->xlo_can_reuse(loc, xi); 1488 } 1489 1490 /* How much free space is needed to set the new value */ 1491 static int ocfs2_xa_check_space(struct ocfs2_xa_loc *loc, 1492 struct ocfs2_xattr_info *xi) 1493 { 1494 return loc->xl_ops->xlo_check_space(loc, xi); 1495 } 1496 1497 static void ocfs2_xa_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) 1498 { 1499 loc->xl_ops->xlo_add_entry(loc, name_hash); 1500 loc->xl_entry->xe_name_hash = cpu_to_le32(name_hash); 1501 /* 1502 * We can't leave the new entry's xe_name_offset at zero or 1503 * add_namevalue() will go nuts. We set it to the size of our 1504 * storage so that it can never be less than any other entry. 1505 */ 1506 loc->xl_entry->xe_name_offset = cpu_to_le16(loc->xl_size); 1507 } 1508 1509 static void ocfs2_xa_add_namevalue(struct ocfs2_xa_loc *loc, 1510 struct ocfs2_xattr_info *xi) 1511 { 1512 int size = namevalue_size_xi(xi); 1513 int nameval_offset; 1514 char *nameval_buf; 1515 1516 loc->xl_ops->xlo_add_namevalue(loc, size); 1517 loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len); 1518 loc->xl_entry->xe_name_len = xi->xi_name_len; 1519 ocfs2_xattr_set_type(loc->xl_entry, xi->xi_name_index); 1520 ocfs2_xattr_set_local(loc->xl_entry, 1521 xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE); 1522 1523 nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 1524 nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset); 1525 memset(nameval_buf, 0, size); 1526 memcpy(nameval_buf, xi->xi_name, xi->xi_name_len); 1527 } 1528 1529 static void ocfs2_xa_fill_value_buf(struct ocfs2_xa_loc *loc, 1530 struct ocfs2_xattr_value_buf *vb) 1531 { 1532 int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 1533 int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len); 1534 1535 /* Value bufs are for value trees */ 1536 BUG_ON(ocfs2_xattr_is_local(loc->xl_entry)); 1537 BUG_ON(namevalue_size_xe(loc->xl_entry) != 1538 (name_size + OCFS2_XATTR_ROOT_SIZE)); 1539 1540 loc->xl_ops->xlo_fill_value_buf(loc, vb); 1541 vb->vb_xv = 1542 (struct ocfs2_xattr_value_root *)ocfs2_xa_offset_pointer(loc, 1543 nameval_offset + 1544 name_size); 1545 } 1546 1547 static int ocfs2_xa_block_journal_access(handle_t *handle, 1548 struct ocfs2_xa_loc *loc, int type) 1549 { 1550 struct buffer_head *bh = loc->xl_storage; 1551 ocfs2_journal_access_func access; 1552 1553 if (loc->xl_size == (bh->b_size - 1554 offsetof(struct ocfs2_xattr_block, 1555 xb_attrs.xb_header))) 1556 access = ocfs2_journal_access_xb; 1557 else 1558 access = ocfs2_journal_access_di; 1559 return access(handle, INODE_CACHE(loc->xl_inode), bh, type); 1560 } 1561 1562 static void ocfs2_xa_block_journal_dirty(handle_t *handle, 1563 struct ocfs2_xa_loc *loc) 1564 { 1565 struct buffer_head *bh = loc->xl_storage; 1566 1567 ocfs2_journal_dirty(handle, bh); 1568 } 1569 1570 static void *ocfs2_xa_block_offset_pointer(struct ocfs2_xa_loc *loc, 1571 int offset) 1572 { 1573 return (char *)loc->xl_header + offset; 1574 } 1575 1576 static int ocfs2_xa_block_can_reuse(struct ocfs2_xa_loc *loc, 1577 struct ocfs2_xattr_info *xi) 1578 { 1579 /* 1580 * Block storage is strict. If the sizes aren't exact, we will 1581 * remove the old one and reinsert the new. 1582 */ 1583 return namevalue_size_xe(loc->xl_entry) == 1584 namevalue_size_xi(xi); 1585 } 1586 1587 static int ocfs2_xa_block_get_free_start(struct ocfs2_xa_loc *loc) 1588 { 1589 struct ocfs2_xattr_header *xh = loc->xl_header; 1590 int i, count = le16_to_cpu(xh->xh_count); 1591 int offset, free_start = loc->xl_size; 1592 1593 for (i = 0; i < count; i++) { 1594 offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset); 1595 if (offset < free_start) 1596 free_start = offset; 1597 } 1598 1599 return free_start; 1600 } 1601 1602 static int ocfs2_xa_block_check_space(struct ocfs2_xa_loc *loc, 1603 struct ocfs2_xattr_info *xi) 1604 { 1605 int count = le16_to_cpu(loc->xl_header->xh_count); 1606 int free_start = ocfs2_xa_get_free_start(loc); 1607 int needed_space = ocfs2_xi_entry_usage(xi); 1608 1609 /* 1610 * Block storage will reclaim the original entry before inserting 1611 * the new value, so we only need the difference. If the new 1612 * entry is smaller than the old one, we don't need anything. 1613 */ 1614 if (loc->xl_entry) { 1615 /* Don't need space if we're reusing! */ 1616 if (ocfs2_xa_can_reuse_entry(loc, xi)) 1617 needed_space = 0; 1618 else 1619 needed_space -= ocfs2_xe_entry_usage(loc->xl_entry); 1620 } 1621 if (needed_space < 0) 1622 needed_space = 0; 1623 return ocfs2_xa_check_space_helper(needed_space, free_start, count); 1624 } 1625 1626 /* 1627 * Block storage for xattrs keeps the name+value pairs compacted. When 1628 * we remove one, we have to shift any that preceded it towards the end. 1629 */ 1630 static void ocfs2_xa_block_wipe_namevalue(struct ocfs2_xa_loc *loc) 1631 { 1632 int i, offset; 1633 int namevalue_offset, first_namevalue_offset, namevalue_size; 1634 struct ocfs2_xattr_entry *entry = loc->xl_entry; 1635 struct ocfs2_xattr_header *xh = loc->xl_header; 1636 int count = le16_to_cpu(xh->xh_count); 1637 1638 namevalue_offset = le16_to_cpu(entry->xe_name_offset); 1639 namevalue_size = namevalue_size_xe(entry); 1640 first_namevalue_offset = ocfs2_xa_get_free_start(loc); 1641 1642 /* Shift the name+value pairs */ 1643 memmove((char *)xh + first_namevalue_offset + namevalue_size, 1644 (char *)xh + first_namevalue_offset, 1645 namevalue_offset - first_namevalue_offset); 1646 memset((char *)xh + first_namevalue_offset, 0, namevalue_size); 1647 1648 /* Now tell xh->xh_entries about it */ 1649 for (i = 0; i < count; i++) { 1650 offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset); 1651 if (offset <= namevalue_offset) 1652 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, 1653 namevalue_size); 1654 } 1655 1656 /* 1657 * Note that we don't update xh_free_start or xh_name_value_len 1658 * because they're not used in block-stored xattrs. 1659 */ 1660 } 1661 1662 static void ocfs2_xa_block_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) 1663 { 1664 int count = le16_to_cpu(loc->xl_header->xh_count); 1665 loc->xl_entry = &(loc->xl_header->xh_entries[count]); 1666 le16_add_cpu(&loc->xl_header->xh_count, 1); 1667 memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry)); 1668 } 1669 1670 static void ocfs2_xa_block_add_namevalue(struct ocfs2_xa_loc *loc, int size) 1671 { 1672 int free_start = ocfs2_xa_get_free_start(loc); 1673 1674 loc->xl_entry->xe_name_offset = cpu_to_le16(free_start - size); 1675 } 1676 1677 static void ocfs2_xa_block_fill_value_buf(struct ocfs2_xa_loc *loc, 1678 struct ocfs2_xattr_value_buf *vb) 1679 { 1680 struct buffer_head *bh = loc->xl_storage; 1681 1682 if (loc->xl_size == (bh->b_size - 1683 offsetof(struct ocfs2_xattr_block, 1684 xb_attrs.xb_header))) 1685 vb->vb_access = ocfs2_journal_access_xb; 1686 else 1687 vb->vb_access = ocfs2_journal_access_di; 1688 vb->vb_bh = bh; 1689 } 1690 1691 /* 1692 * Operations for xattrs stored in blocks. This includes inline inode 1693 * storage and unindexed ocfs2_xattr_blocks. 1694 */ 1695 static const struct ocfs2_xa_loc_operations ocfs2_xa_block_loc_ops = { 1696 .xlo_journal_access = ocfs2_xa_block_journal_access, 1697 .xlo_journal_dirty = ocfs2_xa_block_journal_dirty, 1698 .xlo_offset_pointer = ocfs2_xa_block_offset_pointer, 1699 .xlo_check_space = ocfs2_xa_block_check_space, 1700 .xlo_can_reuse = ocfs2_xa_block_can_reuse, 1701 .xlo_get_free_start = ocfs2_xa_block_get_free_start, 1702 .xlo_wipe_namevalue = ocfs2_xa_block_wipe_namevalue, 1703 .xlo_add_entry = ocfs2_xa_block_add_entry, 1704 .xlo_add_namevalue = ocfs2_xa_block_add_namevalue, 1705 .xlo_fill_value_buf = ocfs2_xa_block_fill_value_buf, 1706 }; 1707 1708 static int ocfs2_xa_bucket_journal_access(handle_t *handle, 1709 struct ocfs2_xa_loc *loc, int type) 1710 { 1711 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1712 1713 return ocfs2_xattr_bucket_journal_access(handle, bucket, type); 1714 } 1715 1716 static void ocfs2_xa_bucket_journal_dirty(handle_t *handle, 1717 struct ocfs2_xa_loc *loc) 1718 { 1719 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1720 1721 ocfs2_xattr_bucket_journal_dirty(handle, bucket); 1722 } 1723 1724 static void *ocfs2_xa_bucket_offset_pointer(struct ocfs2_xa_loc *loc, 1725 int offset) 1726 { 1727 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1728 int block, block_offset; 1729 1730 /* The header is at the front of the bucket */ 1731 block = offset >> loc->xl_inode->i_sb->s_blocksize_bits; 1732 block_offset = offset % loc->xl_inode->i_sb->s_blocksize; 1733 1734 return bucket_block(bucket, block) + block_offset; 1735 } 1736 1737 static int ocfs2_xa_bucket_can_reuse(struct ocfs2_xa_loc *loc, 1738 struct ocfs2_xattr_info *xi) 1739 { 1740 return namevalue_size_xe(loc->xl_entry) >= 1741 namevalue_size_xi(xi); 1742 } 1743 1744 static int ocfs2_xa_bucket_get_free_start(struct ocfs2_xa_loc *loc) 1745 { 1746 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1747 return le16_to_cpu(bucket_xh(bucket)->xh_free_start); 1748 } 1749 1750 static int ocfs2_bucket_align_free_start(struct super_block *sb, 1751 int free_start, int size) 1752 { 1753 /* 1754 * We need to make sure that the name+value pair fits within 1755 * one block. 1756 */ 1757 if (((free_start - size) >> sb->s_blocksize_bits) != 1758 ((free_start - 1) >> sb->s_blocksize_bits)) 1759 free_start -= free_start % sb->s_blocksize; 1760 1761 return free_start; 1762 } 1763 1764 static int ocfs2_xa_bucket_check_space(struct ocfs2_xa_loc *loc, 1765 struct ocfs2_xattr_info *xi) 1766 { 1767 int rc; 1768 int count = le16_to_cpu(loc->xl_header->xh_count); 1769 int free_start = ocfs2_xa_get_free_start(loc); 1770 int needed_space = ocfs2_xi_entry_usage(xi); 1771 int size = namevalue_size_xi(xi); 1772 struct super_block *sb = loc->xl_inode->i_sb; 1773 1774 /* 1775 * Bucket storage does not reclaim name+value pairs it cannot 1776 * reuse. They live as holes until the bucket fills, and then 1777 * the bucket is defragmented. However, the bucket can reclaim 1778 * the ocfs2_xattr_entry. 1779 */ 1780 if (loc->xl_entry) { 1781 /* Don't need space if we're reusing! */ 1782 if (ocfs2_xa_can_reuse_entry(loc, xi)) 1783 needed_space = 0; 1784 else 1785 needed_space -= sizeof(struct ocfs2_xattr_entry); 1786 } 1787 BUG_ON(needed_space < 0); 1788 1789 if (free_start < size) { 1790 if (needed_space) 1791 return -ENOSPC; 1792 } else { 1793 /* 1794 * First we check if it would fit in the first place. 1795 * Below, we align the free start to a block. This may 1796 * slide us below the minimum gap. By checking unaligned 1797 * first, we avoid that error. 1798 */ 1799 rc = ocfs2_xa_check_space_helper(needed_space, free_start, 1800 count); 1801 if (rc) 1802 return rc; 1803 free_start = ocfs2_bucket_align_free_start(sb, free_start, 1804 size); 1805 } 1806 return ocfs2_xa_check_space_helper(needed_space, free_start, count); 1807 } 1808 1809 static void ocfs2_xa_bucket_wipe_namevalue(struct ocfs2_xa_loc *loc) 1810 { 1811 le16_add_cpu(&loc->xl_header->xh_name_value_len, 1812 -namevalue_size_xe(loc->xl_entry)); 1813 } 1814 1815 static void ocfs2_xa_bucket_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) 1816 { 1817 struct ocfs2_xattr_header *xh = loc->xl_header; 1818 int count = le16_to_cpu(xh->xh_count); 1819 int low = 0, high = count - 1, tmp; 1820 struct ocfs2_xattr_entry *tmp_xe; 1821 1822 /* 1823 * We keep buckets sorted by name_hash, so we need to find 1824 * our insert place. 1825 */ 1826 while (low <= high && count) { 1827 tmp = (low + high) / 2; 1828 tmp_xe = &xh->xh_entries[tmp]; 1829 1830 if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash)) 1831 low = tmp + 1; 1832 else if (name_hash < le32_to_cpu(tmp_xe->xe_name_hash)) 1833 high = tmp - 1; 1834 else { 1835 low = tmp; 1836 break; 1837 } 1838 } 1839 1840 if (low != count) 1841 memmove(&xh->xh_entries[low + 1], 1842 &xh->xh_entries[low], 1843 ((count - low) * sizeof(struct ocfs2_xattr_entry))); 1844 1845 le16_add_cpu(&xh->xh_count, 1); 1846 loc->xl_entry = &xh->xh_entries[low]; 1847 memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry)); 1848 } 1849 1850 static void ocfs2_xa_bucket_add_namevalue(struct ocfs2_xa_loc *loc, int size) 1851 { 1852 int free_start = ocfs2_xa_get_free_start(loc); 1853 struct ocfs2_xattr_header *xh = loc->xl_header; 1854 struct super_block *sb = loc->xl_inode->i_sb; 1855 int nameval_offset; 1856 1857 free_start = ocfs2_bucket_align_free_start(sb, free_start, size); 1858 nameval_offset = free_start - size; 1859 loc->xl_entry->xe_name_offset = cpu_to_le16(nameval_offset); 1860 xh->xh_free_start = cpu_to_le16(nameval_offset); 1861 le16_add_cpu(&xh->xh_name_value_len, size); 1862 1863 } 1864 1865 static void ocfs2_xa_bucket_fill_value_buf(struct ocfs2_xa_loc *loc, 1866 struct ocfs2_xattr_value_buf *vb) 1867 { 1868 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1869 struct super_block *sb = loc->xl_inode->i_sb; 1870 int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 1871 int size = namevalue_size_xe(loc->xl_entry); 1872 int block_offset = nameval_offset >> sb->s_blocksize_bits; 1873 1874 /* Values are not allowed to straddle block boundaries */ 1875 BUG_ON(block_offset != 1876 ((nameval_offset + size - 1) >> sb->s_blocksize_bits)); 1877 /* We expect the bucket to be filled in */ 1878 BUG_ON(!bucket->bu_bhs[block_offset]); 1879 1880 vb->vb_access = ocfs2_journal_access; 1881 vb->vb_bh = bucket->bu_bhs[block_offset]; 1882 } 1883 1884 /* Operations for xattrs stored in buckets. */ 1885 static const struct ocfs2_xa_loc_operations ocfs2_xa_bucket_loc_ops = { 1886 .xlo_journal_access = ocfs2_xa_bucket_journal_access, 1887 .xlo_journal_dirty = ocfs2_xa_bucket_journal_dirty, 1888 .xlo_offset_pointer = ocfs2_xa_bucket_offset_pointer, 1889 .xlo_check_space = ocfs2_xa_bucket_check_space, 1890 .xlo_can_reuse = ocfs2_xa_bucket_can_reuse, 1891 .xlo_get_free_start = ocfs2_xa_bucket_get_free_start, 1892 .xlo_wipe_namevalue = ocfs2_xa_bucket_wipe_namevalue, 1893 .xlo_add_entry = ocfs2_xa_bucket_add_entry, 1894 .xlo_add_namevalue = ocfs2_xa_bucket_add_namevalue, 1895 .xlo_fill_value_buf = ocfs2_xa_bucket_fill_value_buf, 1896 }; 1897 1898 static unsigned int ocfs2_xa_value_clusters(struct ocfs2_xa_loc *loc) 1899 { 1900 struct ocfs2_xattr_value_buf vb; 1901 1902 if (ocfs2_xattr_is_local(loc->xl_entry)) 1903 return 0; 1904 1905 ocfs2_xa_fill_value_buf(loc, &vb); 1906 return le32_to_cpu(vb.vb_xv->xr_clusters); 1907 } 1908 1909 static int ocfs2_xa_value_truncate(struct ocfs2_xa_loc *loc, u64 bytes, 1910 struct ocfs2_xattr_set_ctxt *ctxt) 1911 { 1912 int trunc_rc, access_rc; 1913 struct ocfs2_xattr_value_buf vb; 1914 1915 ocfs2_xa_fill_value_buf(loc, &vb); 1916 trunc_rc = ocfs2_xattr_value_truncate(loc->xl_inode, &vb, bytes, 1917 ctxt); 1918 1919 /* 1920 * The caller of ocfs2_xa_value_truncate() has already called 1921 * ocfs2_xa_journal_access on the loc. However, The truncate code 1922 * calls ocfs2_extend_trans(). This may commit the previous 1923 * transaction and open a new one. If this is a bucket, truncate 1924 * could leave only vb->vb_bh set up for journaling. Meanwhile, 1925 * the caller is expecting to dirty the entire bucket. So we must 1926 * reset the journal work. We do this even if truncate has failed, 1927 * as it could have failed after committing the extend. 1928 */ 1929 access_rc = ocfs2_xa_journal_access(ctxt->handle, loc, 1930 OCFS2_JOURNAL_ACCESS_WRITE); 1931 1932 /* Errors in truncate take precedence */ 1933 return trunc_rc ? trunc_rc : access_rc; 1934 } 1935 1936 static void ocfs2_xa_remove_entry(struct ocfs2_xa_loc *loc) 1937 { 1938 int index, count; 1939 struct ocfs2_xattr_header *xh = loc->xl_header; 1940 struct ocfs2_xattr_entry *entry = loc->xl_entry; 1941 1942 ocfs2_xa_wipe_namevalue(loc); 1943 loc->xl_entry = NULL; 1944 1945 le16_add_cpu(&xh->xh_count, -1); 1946 count = le16_to_cpu(xh->xh_count); 1947 1948 /* 1949 * Only zero out the entry if there are more remaining. This is 1950 * important for an empty bucket, as it keeps track of the 1951 * bucket's hash value. It doesn't hurt empty block storage. 1952 */ 1953 if (count) { 1954 index = ((char *)entry - (char *)&xh->xh_entries) / 1955 sizeof(struct ocfs2_xattr_entry); 1956 memmove(&xh->xh_entries[index], &xh->xh_entries[index + 1], 1957 (count - index) * sizeof(struct ocfs2_xattr_entry)); 1958 memset(&xh->xh_entries[count], 0, 1959 sizeof(struct ocfs2_xattr_entry)); 1960 } 1961 } 1962 1963 /* 1964 * If we have a problem adjusting the size of an external value during 1965 * ocfs2_xa_prepare_entry() or ocfs2_xa_remove(), we may have an xattr 1966 * in an intermediate state. For example, the value may be partially 1967 * truncated. 1968 * 1969 * If the value tree hasn't changed, the extend/truncate went nowhere. 1970 * We have nothing to do. The caller can treat it as a straight error. 1971 * 1972 * If the value tree got partially truncated, we now have a corrupted 1973 * extended attribute. We're going to wipe its entry and leak the 1974 * clusters. Better to leak some storage than leave a corrupt entry. 1975 * 1976 * If the value tree grew, it obviously didn't grow enough for the 1977 * new entry. We're not going to try and reclaim those clusters either. 1978 * If there was already an external value there (orig_clusters != 0), 1979 * the new clusters are attached safely and we can just leave the old 1980 * value in place. If there was no external value there, we remove 1981 * the entry. 1982 * 1983 * This way, the xattr block we store in the journal will be consistent. 1984 * If the size change broke because of the journal, no changes will hit 1985 * disk anyway. 1986 */ 1987 static void ocfs2_xa_cleanup_value_truncate(struct ocfs2_xa_loc *loc, 1988 const char *what, 1989 unsigned int orig_clusters) 1990 { 1991 unsigned int new_clusters = ocfs2_xa_value_clusters(loc); 1992 char *nameval_buf = ocfs2_xa_offset_pointer(loc, 1993 le16_to_cpu(loc->xl_entry->xe_name_offset)); 1994 1995 if (new_clusters < orig_clusters) { 1996 mlog(ML_ERROR, 1997 "Partial truncate while %s xattr %.*s. Leaking " 1998 "%u clusters and removing the entry\n", 1999 what, loc->xl_entry->xe_name_len, nameval_buf, 2000 orig_clusters - new_clusters); 2001 ocfs2_xa_remove_entry(loc); 2002 } else if (!orig_clusters) { 2003 mlog(ML_ERROR, 2004 "Unable to allocate an external value for xattr " 2005 "%.*s safely. Leaking %u clusters and removing the " 2006 "entry\n", 2007 loc->xl_entry->xe_name_len, nameval_buf, 2008 new_clusters - orig_clusters); 2009 ocfs2_xa_remove_entry(loc); 2010 } else if (new_clusters > orig_clusters) 2011 mlog(ML_ERROR, 2012 "Unable to grow xattr %.*s safely. %u new clusters " 2013 "have been added, but the value will not be " 2014 "modified\n", 2015 loc->xl_entry->xe_name_len, nameval_buf, 2016 new_clusters - orig_clusters); 2017 } 2018 2019 static int ocfs2_xa_remove(struct ocfs2_xa_loc *loc, 2020 struct ocfs2_xattr_set_ctxt *ctxt) 2021 { 2022 int rc = 0; 2023 unsigned int orig_clusters; 2024 2025 if (!ocfs2_xattr_is_local(loc->xl_entry)) { 2026 orig_clusters = ocfs2_xa_value_clusters(loc); 2027 rc = ocfs2_xa_value_truncate(loc, 0, ctxt); 2028 if (rc) { 2029 mlog_errno(rc); 2030 /* 2031 * Since this is remove, we can return 0 if 2032 * ocfs2_xa_cleanup_value_truncate() is going to 2033 * wipe the entry anyway. So we check the 2034 * cluster count as well. 2035 */ 2036 if (orig_clusters != ocfs2_xa_value_clusters(loc)) 2037 rc = 0; 2038 ocfs2_xa_cleanup_value_truncate(loc, "removing", 2039 orig_clusters); 2040 if (rc) 2041 goto out; 2042 } 2043 } 2044 2045 ocfs2_xa_remove_entry(loc); 2046 2047 out: 2048 return rc; 2049 } 2050 2051 static void ocfs2_xa_install_value_root(struct ocfs2_xa_loc *loc) 2052 { 2053 int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len); 2054 char *nameval_buf; 2055 2056 nameval_buf = ocfs2_xa_offset_pointer(loc, 2057 le16_to_cpu(loc->xl_entry->xe_name_offset)); 2058 memcpy(nameval_buf + name_size, &def_xv, OCFS2_XATTR_ROOT_SIZE); 2059 } 2060 2061 /* 2062 * Take an existing entry and make it ready for the new value. This 2063 * won't allocate space, but it may free space. It should be ready for 2064 * ocfs2_xa_prepare_entry() to finish the work. 2065 */ 2066 static int ocfs2_xa_reuse_entry(struct ocfs2_xa_loc *loc, 2067 struct ocfs2_xattr_info *xi, 2068 struct ocfs2_xattr_set_ctxt *ctxt) 2069 { 2070 int rc = 0; 2071 int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len); 2072 unsigned int orig_clusters; 2073 char *nameval_buf; 2074 int xe_local = ocfs2_xattr_is_local(loc->xl_entry); 2075 int xi_local = xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE; 2076 2077 BUG_ON(OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len) != 2078 name_size); 2079 2080 nameval_buf = ocfs2_xa_offset_pointer(loc, 2081 le16_to_cpu(loc->xl_entry->xe_name_offset)); 2082 if (xe_local) { 2083 memset(nameval_buf + name_size, 0, 2084 namevalue_size_xe(loc->xl_entry) - name_size); 2085 if (!xi_local) 2086 ocfs2_xa_install_value_root(loc); 2087 } else { 2088 orig_clusters = ocfs2_xa_value_clusters(loc); 2089 if (xi_local) { 2090 rc = ocfs2_xa_value_truncate(loc, 0, ctxt); 2091 if (rc < 0) 2092 mlog_errno(rc); 2093 else 2094 memset(nameval_buf + name_size, 0, 2095 namevalue_size_xe(loc->xl_entry) - 2096 name_size); 2097 } else if (le64_to_cpu(loc->xl_entry->xe_value_size) > 2098 xi->xi_value_len) { 2099 rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, 2100 ctxt); 2101 if (rc < 0) 2102 mlog_errno(rc); 2103 } 2104 2105 if (rc) { 2106 ocfs2_xa_cleanup_value_truncate(loc, "reusing", 2107 orig_clusters); 2108 goto out; 2109 } 2110 } 2111 2112 loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len); 2113 ocfs2_xattr_set_local(loc->xl_entry, xi_local); 2114 2115 out: 2116 return rc; 2117 } 2118 2119 /* 2120 * Prepares loc->xl_entry to receive the new xattr. This includes 2121 * properly setting up the name+value pair region. If loc->xl_entry 2122 * already exists, it will take care of modifying it appropriately. 2123 * 2124 * Note that this modifies the data. You did journal_access already, 2125 * right? 2126 */ 2127 static int ocfs2_xa_prepare_entry(struct ocfs2_xa_loc *loc, 2128 struct ocfs2_xattr_info *xi, 2129 u32 name_hash, 2130 struct ocfs2_xattr_set_ctxt *ctxt) 2131 { 2132 int rc = 0; 2133 unsigned int orig_clusters; 2134 __le64 orig_value_size = 0; 2135 2136 rc = ocfs2_xa_check_space(loc, xi); 2137 if (rc) 2138 goto out; 2139 2140 if (loc->xl_entry) { 2141 if (ocfs2_xa_can_reuse_entry(loc, xi)) { 2142 orig_value_size = loc->xl_entry->xe_value_size; 2143 rc = ocfs2_xa_reuse_entry(loc, xi, ctxt); 2144 if (rc) 2145 goto out; 2146 goto alloc_value; 2147 } 2148 2149 if (!ocfs2_xattr_is_local(loc->xl_entry)) { 2150 orig_clusters = ocfs2_xa_value_clusters(loc); 2151 rc = ocfs2_xa_value_truncate(loc, 0, ctxt); 2152 if (rc) { 2153 mlog_errno(rc); 2154 ocfs2_xa_cleanup_value_truncate(loc, 2155 "overwriting", 2156 orig_clusters); 2157 goto out; 2158 } 2159 } 2160 ocfs2_xa_wipe_namevalue(loc); 2161 } else 2162 ocfs2_xa_add_entry(loc, name_hash); 2163 2164 /* 2165 * If we get here, we have a blank entry. Fill it. We grow our 2166 * name+value pair back from the end. 2167 */ 2168 ocfs2_xa_add_namevalue(loc, xi); 2169 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) 2170 ocfs2_xa_install_value_root(loc); 2171 2172 alloc_value: 2173 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 2174 orig_clusters = ocfs2_xa_value_clusters(loc); 2175 rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, ctxt); 2176 if (rc < 0) { 2177 ctxt->set_abort = 1; 2178 ocfs2_xa_cleanup_value_truncate(loc, "growing", 2179 orig_clusters); 2180 /* 2181 * If we were growing an existing value, 2182 * ocfs2_xa_cleanup_value_truncate() won't remove 2183 * the entry. We need to restore the original value 2184 * size. 2185 */ 2186 if (loc->xl_entry) { 2187 BUG_ON(!orig_value_size); 2188 loc->xl_entry->xe_value_size = orig_value_size; 2189 } 2190 mlog_errno(rc); 2191 } 2192 } 2193 2194 out: 2195 return rc; 2196 } 2197 2198 /* 2199 * Store the value portion of the name+value pair. This will skip 2200 * values that are stored externally. Their tree roots were set up 2201 * by ocfs2_xa_prepare_entry(). 2202 */ 2203 static int ocfs2_xa_store_value(struct ocfs2_xa_loc *loc, 2204 struct ocfs2_xattr_info *xi, 2205 struct ocfs2_xattr_set_ctxt *ctxt) 2206 { 2207 int rc = 0; 2208 int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 2209 int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len); 2210 char *nameval_buf; 2211 struct ocfs2_xattr_value_buf vb; 2212 2213 nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset); 2214 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 2215 ocfs2_xa_fill_value_buf(loc, &vb); 2216 rc = __ocfs2_xattr_set_value_outside(loc->xl_inode, 2217 ctxt->handle, &vb, 2218 xi->xi_value, 2219 xi->xi_value_len); 2220 } else 2221 memcpy(nameval_buf + name_size, xi->xi_value, xi->xi_value_len); 2222 2223 return rc; 2224 } 2225 2226 static int ocfs2_xa_set(struct ocfs2_xa_loc *loc, 2227 struct ocfs2_xattr_info *xi, 2228 struct ocfs2_xattr_set_ctxt *ctxt) 2229 { 2230 int ret; 2231 u32 name_hash = ocfs2_xattr_name_hash(loc->xl_inode, xi->xi_name, 2232 xi->xi_name_len); 2233 2234 ret = ocfs2_xa_journal_access(ctxt->handle, loc, 2235 OCFS2_JOURNAL_ACCESS_WRITE); 2236 if (ret) { 2237 mlog_errno(ret); 2238 goto out; 2239 } 2240 2241 /* 2242 * From here on out, everything is going to modify the buffer a 2243 * little. Errors are going to leave the xattr header in a 2244 * sane state. Thus, even with errors we dirty the sucker. 2245 */ 2246 2247 /* Don't worry, we are never called with !xi_value and !xl_entry */ 2248 if (!xi->xi_value) { 2249 ret = ocfs2_xa_remove(loc, ctxt); 2250 goto out_dirty; 2251 } 2252 2253 ret = ocfs2_xa_prepare_entry(loc, xi, name_hash, ctxt); 2254 if (ret) { 2255 if (ret != -ENOSPC) 2256 mlog_errno(ret); 2257 goto out_dirty; 2258 } 2259 2260 ret = ocfs2_xa_store_value(loc, xi, ctxt); 2261 if (ret) 2262 mlog_errno(ret); 2263 2264 out_dirty: 2265 ocfs2_xa_journal_dirty(ctxt->handle, loc); 2266 2267 out: 2268 return ret; 2269 } 2270 2271 static void ocfs2_init_dinode_xa_loc(struct ocfs2_xa_loc *loc, 2272 struct inode *inode, 2273 struct buffer_head *bh, 2274 struct ocfs2_xattr_entry *entry) 2275 { 2276 struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; 2277 2278 BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_XATTR_FL)); 2279 2280 loc->xl_inode = inode; 2281 loc->xl_ops = &ocfs2_xa_block_loc_ops; 2282 loc->xl_storage = bh; 2283 loc->xl_entry = entry; 2284 loc->xl_size = le16_to_cpu(di->i_xattr_inline_size); 2285 loc->xl_header = 2286 (struct ocfs2_xattr_header *)(bh->b_data + bh->b_size - 2287 loc->xl_size); 2288 } 2289 2290 static void ocfs2_init_xattr_block_xa_loc(struct ocfs2_xa_loc *loc, 2291 struct inode *inode, 2292 struct buffer_head *bh, 2293 struct ocfs2_xattr_entry *entry) 2294 { 2295 struct ocfs2_xattr_block *xb = 2296 (struct ocfs2_xattr_block *)bh->b_data; 2297 2298 BUG_ON(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED); 2299 2300 loc->xl_inode = inode; 2301 loc->xl_ops = &ocfs2_xa_block_loc_ops; 2302 loc->xl_storage = bh; 2303 loc->xl_header = &(xb->xb_attrs.xb_header); 2304 loc->xl_entry = entry; 2305 loc->xl_size = bh->b_size - offsetof(struct ocfs2_xattr_block, 2306 xb_attrs.xb_header); 2307 } 2308 2309 static void ocfs2_init_xattr_bucket_xa_loc(struct ocfs2_xa_loc *loc, 2310 struct ocfs2_xattr_bucket *bucket, 2311 struct ocfs2_xattr_entry *entry) 2312 { 2313 loc->xl_inode = bucket->bu_inode; 2314 loc->xl_ops = &ocfs2_xa_bucket_loc_ops; 2315 loc->xl_storage = bucket; 2316 loc->xl_header = bucket_xh(bucket); 2317 loc->xl_entry = entry; 2318 loc->xl_size = OCFS2_XATTR_BUCKET_SIZE; 2319 } 2320 2321 /* 2322 * In xattr remove, if it is stored outside and refcounted, we may have 2323 * the chance to split the refcount tree. So need the allocators. 2324 */ 2325 static int ocfs2_lock_xattr_remove_allocators(struct inode *inode, 2326 struct ocfs2_xattr_value_root *xv, 2327 struct ocfs2_caching_info *ref_ci, 2328 struct buffer_head *ref_root_bh, 2329 struct ocfs2_alloc_context **meta_ac, 2330 int *ref_credits) 2331 { 2332 int ret, meta_add = 0; 2333 u32 p_cluster, num_clusters; 2334 unsigned int ext_flags; 2335 2336 *ref_credits = 0; 2337 ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster, 2338 &num_clusters, 2339 &xv->xr_list, 2340 &ext_flags); 2341 if (ret) { 2342 mlog_errno(ret); 2343 goto out; 2344 } 2345 2346 if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) 2347 goto out; 2348 2349 ret = ocfs2_refcounted_xattr_delete_need(inode, ref_ci, 2350 ref_root_bh, xv, 2351 &meta_add, ref_credits); 2352 if (ret) { 2353 mlog_errno(ret); 2354 goto out; 2355 } 2356 2357 ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb), 2358 meta_add, meta_ac); 2359 if (ret) 2360 mlog_errno(ret); 2361 2362 out: 2363 return ret; 2364 } 2365 2366 static int ocfs2_remove_value_outside(struct inode*inode, 2367 struct ocfs2_xattr_value_buf *vb, 2368 struct ocfs2_xattr_header *header, 2369 struct ocfs2_caching_info *ref_ci, 2370 struct buffer_head *ref_root_bh) 2371 { 2372 int ret = 0, i, ref_credits; 2373 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2374 struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, }; 2375 void *val; 2376 2377 ocfs2_init_dealloc_ctxt(&ctxt.dealloc); 2378 2379 for (i = 0; i < le16_to_cpu(header->xh_count); i++) { 2380 struct ocfs2_xattr_entry *entry = &header->xh_entries[i]; 2381 2382 if (ocfs2_xattr_is_local(entry)) 2383 continue; 2384 2385 val = (void *)header + 2386 le16_to_cpu(entry->xe_name_offset); 2387 vb->vb_xv = (struct ocfs2_xattr_value_root *) 2388 (val + OCFS2_XATTR_SIZE(entry->xe_name_len)); 2389 2390 ret = ocfs2_lock_xattr_remove_allocators(inode, vb->vb_xv, 2391 ref_ci, ref_root_bh, 2392 &ctxt.meta_ac, 2393 &ref_credits); 2394 2395 ctxt.handle = ocfs2_start_trans(osb, ref_credits + 2396 ocfs2_remove_extent_credits(osb->sb)); 2397 if (IS_ERR(ctxt.handle)) { 2398 ret = PTR_ERR(ctxt.handle); 2399 mlog_errno(ret); 2400 break; 2401 } 2402 2403 ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt); 2404 2405 ocfs2_commit_trans(osb, ctxt.handle); 2406 if (ctxt.meta_ac) { 2407 ocfs2_free_alloc_context(ctxt.meta_ac); 2408 ctxt.meta_ac = NULL; 2409 } 2410 2411 if (ret < 0) { 2412 mlog_errno(ret); 2413 break; 2414 } 2415 2416 } 2417 2418 if (ctxt.meta_ac) 2419 ocfs2_free_alloc_context(ctxt.meta_ac); 2420 ocfs2_schedule_truncate_log_flush(osb, 1); 2421 ocfs2_run_deallocs(osb, &ctxt.dealloc); 2422 return ret; 2423 } 2424 2425 static int ocfs2_xattr_ibody_remove(struct inode *inode, 2426 struct buffer_head *di_bh, 2427 struct ocfs2_caching_info *ref_ci, 2428 struct buffer_head *ref_root_bh) 2429 { 2430 2431 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 2432 struct ocfs2_xattr_header *header; 2433 int ret; 2434 struct ocfs2_xattr_value_buf vb = { 2435 .vb_bh = di_bh, 2436 .vb_access = ocfs2_journal_access_di, 2437 }; 2438 2439 header = (struct ocfs2_xattr_header *) 2440 ((void *)di + inode->i_sb->s_blocksize - 2441 le16_to_cpu(di->i_xattr_inline_size)); 2442 2443 ret = ocfs2_remove_value_outside(inode, &vb, header, 2444 ref_ci, ref_root_bh); 2445 2446 return ret; 2447 } 2448 2449 struct ocfs2_rm_xattr_bucket_para { 2450 struct ocfs2_caching_info *ref_ci; 2451 struct buffer_head *ref_root_bh; 2452 }; 2453 2454 static int ocfs2_xattr_block_remove(struct inode *inode, 2455 struct buffer_head *blk_bh, 2456 struct ocfs2_caching_info *ref_ci, 2457 struct buffer_head *ref_root_bh) 2458 { 2459 struct ocfs2_xattr_block *xb; 2460 int ret = 0; 2461 struct ocfs2_xattr_value_buf vb = { 2462 .vb_bh = blk_bh, 2463 .vb_access = ocfs2_journal_access_xb, 2464 }; 2465 struct ocfs2_rm_xattr_bucket_para args = { 2466 .ref_ci = ref_ci, 2467 .ref_root_bh = ref_root_bh, 2468 }; 2469 2470 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 2471 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 2472 struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header); 2473 ret = ocfs2_remove_value_outside(inode, &vb, header, 2474 ref_ci, ref_root_bh); 2475 } else 2476 ret = ocfs2_iterate_xattr_index_block(inode, 2477 blk_bh, 2478 ocfs2_rm_xattr_cluster, 2479 &args); 2480 2481 return ret; 2482 } 2483 2484 static int ocfs2_xattr_free_block(struct inode *inode, 2485 u64 block, 2486 struct ocfs2_caching_info *ref_ci, 2487 struct buffer_head *ref_root_bh) 2488 { 2489 struct inode *xb_alloc_inode; 2490 struct buffer_head *xb_alloc_bh = NULL; 2491 struct buffer_head *blk_bh = NULL; 2492 struct ocfs2_xattr_block *xb; 2493 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2494 handle_t *handle; 2495 int ret = 0; 2496 u64 blk, bg_blkno; 2497 u16 bit; 2498 2499 ret = ocfs2_read_xattr_block(inode, block, &blk_bh); 2500 if (ret < 0) { 2501 mlog_errno(ret); 2502 goto out; 2503 } 2504 2505 ret = ocfs2_xattr_block_remove(inode, blk_bh, ref_ci, ref_root_bh); 2506 if (ret < 0) { 2507 mlog_errno(ret); 2508 goto out; 2509 } 2510 2511 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 2512 blk = le64_to_cpu(xb->xb_blkno); 2513 bit = le16_to_cpu(xb->xb_suballoc_bit); 2514 if (xb->xb_suballoc_loc) 2515 bg_blkno = le64_to_cpu(xb->xb_suballoc_loc); 2516 else 2517 bg_blkno = ocfs2_which_suballoc_group(blk, bit); 2518 2519 xb_alloc_inode = ocfs2_get_system_file_inode(osb, 2520 EXTENT_ALLOC_SYSTEM_INODE, 2521 le16_to_cpu(xb->xb_suballoc_slot)); 2522 if (!xb_alloc_inode) { 2523 ret = -ENOMEM; 2524 mlog_errno(ret); 2525 goto out; 2526 } 2527 inode_lock(xb_alloc_inode); 2528 2529 ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1); 2530 if (ret < 0) { 2531 mlog_errno(ret); 2532 goto out_mutex; 2533 } 2534 2535 handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE); 2536 if (IS_ERR(handle)) { 2537 ret = PTR_ERR(handle); 2538 mlog_errno(ret); 2539 goto out_unlock; 2540 } 2541 2542 ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh, 2543 bit, bg_blkno, 1); 2544 if (ret < 0) 2545 mlog_errno(ret); 2546 2547 ocfs2_commit_trans(osb, handle); 2548 out_unlock: 2549 ocfs2_inode_unlock(xb_alloc_inode, 1); 2550 brelse(xb_alloc_bh); 2551 out_mutex: 2552 inode_unlock(xb_alloc_inode); 2553 iput(xb_alloc_inode); 2554 out: 2555 brelse(blk_bh); 2556 return ret; 2557 } 2558 2559 /* 2560 * ocfs2_xattr_remove() 2561 * 2562 * Free extended attribute resources associated with this inode. 2563 */ 2564 int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh) 2565 { 2566 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2567 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 2568 struct ocfs2_refcount_tree *ref_tree = NULL; 2569 struct buffer_head *ref_root_bh = NULL; 2570 struct ocfs2_caching_info *ref_ci = NULL; 2571 handle_t *handle; 2572 int ret; 2573 2574 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) 2575 return 0; 2576 2577 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) 2578 return 0; 2579 2580 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL) { 2581 ret = ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb), 2582 le64_to_cpu(di->i_refcount_loc), 2583 1, &ref_tree, &ref_root_bh); 2584 if (ret) { 2585 mlog_errno(ret); 2586 goto out; 2587 } 2588 ref_ci = &ref_tree->rf_ci; 2589 2590 } 2591 2592 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 2593 ret = ocfs2_xattr_ibody_remove(inode, di_bh, 2594 ref_ci, ref_root_bh); 2595 if (ret < 0) { 2596 mlog_errno(ret); 2597 goto out; 2598 } 2599 } 2600 2601 if (di->i_xattr_loc) { 2602 ret = ocfs2_xattr_free_block(inode, 2603 le64_to_cpu(di->i_xattr_loc), 2604 ref_ci, ref_root_bh); 2605 if (ret < 0) { 2606 mlog_errno(ret); 2607 goto out; 2608 } 2609 } 2610 2611 handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), 2612 OCFS2_INODE_UPDATE_CREDITS); 2613 if (IS_ERR(handle)) { 2614 ret = PTR_ERR(handle); 2615 mlog_errno(ret); 2616 goto out; 2617 } 2618 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, 2619 OCFS2_JOURNAL_ACCESS_WRITE); 2620 if (ret) { 2621 mlog_errno(ret); 2622 goto out_commit; 2623 } 2624 2625 di->i_xattr_loc = 0; 2626 2627 spin_lock(&oi->ip_lock); 2628 oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL); 2629 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 2630 spin_unlock(&oi->ip_lock); 2631 ocfs2_update_inode_fsync_trans(handle, inode, 0); 2632 2633 ocfs2_journal_dirty(handle, di_bh); 2634 out_commit: 2635 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); 2636 out: 2637 if (ref_tree) 2638 ocfs2_unlock_refcount_tree(OCFS2_SB(inode->i_sb), ref_tree, 1); 2639 brelse(ref_root_bh); 2640 return ret; 2641 } 2642 2643 static int ocfs2_xattr_has_space_inline(struct inode *inode, 2644 struct ocfs2_dinode *di) 2645 { 2646 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2647 unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size; 2648 int free; 2649 2650 if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE) 2651 return 0; 2652 2653 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 2654 struct ocfs2_inline_data *idata = &di->id2.i_data; 2655 free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size); 2656 } else if (ocfs2_inode_is_fast_symlink(inode)) { 2657 free = ocfs2_fast_symlink_chars(inode->i_sb) - 2658 le64_to_cpu(di->i_size); 2659 } else { 2660 struct ocfs2_extent_list *el = &di->id2.i_list; 2661 free = (le16_to_cpu(el->l_count) - 2662 le16_to_cpu(el->l_next_free_rec)) * 2663 sizeof(struct ocfs2_extent_rec); 2664 } 2665 if (free >= xattrsize) 2666 return 1; 2667 2668 return 0; 2669 } 2670 2671 /* 2672 * ocfs2_xattr_ibody_find() 2673 * 2674 * Find extended attribute in inode block and 2675 * fill search info into struct ocfs2_xattr_search. 2676 */ 2677 static int ocfs2_xattr_ibody_find(struct inode *inode, 2678 int name_index, 2679 const char *name, 2680 struct ocfs2_xattr_search *xs) 2681 { 2682 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2683 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 2684 int ret; 2685 int has_space = 0; 2686 2687 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) 2688 return 0; 2689 2690 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { 2691 down_read(&oi->ip_alloc_sem); 2692 has_space = ocfs2_xattr_has_space_inline(inode, di); 2693 up_read(&oi->ip_alloc_sem); 2694 if (!has_space) 2695 return 0; 2696 } 2697 2698 xs->xattr_bh = xs->inode_bh; 2699 xs->end = (void *)di + inode->i_sb->s_blocksize; 2700 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) 2701 xs->header = (struct ocfs2_xattr_header *) 2702 (xs->end - le16_to_cpu(di->i_xattr_inline_size)); 2703 else 2704 xs->header = (struct ocfs2_xattr_header *) 2705 (xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size); 2706 xs->base = (void *)xs->header; 2707 xs->here = xs->header->xh_entries; 2708 2709 /* Find the named attribute. */ 2710 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 2711 ret = ocfs2_xattr_find_entry(name_index, name, xs); 2712 if (ret && ret != -ENODATA) 2713 return ret; 2714 xs->not_found = ret; 2715 } 2716 2717 return 0; 2718 } 2719 2720 static int ocfs2_xattr_ibody_init(struct inode *inode, 2721 struct buffer_head *di_bh, 2722 struct ocfs2_xattr_set_ctxt *ctxt) 2723 { 2724 int ret; 2725 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2726 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 2727 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2728 unsigned int xattrsize = osb->s_xattr_inline_size; 2729 2730 if (!ocfs2_xattr_has_space_inline(inode, di)) { 2731 ret = -ENOSPC; 2732 goto out; 2733 } 2734 2735 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), di_bh, 2736 OCFS2_JOURNAL_ACCESS_WRITE); 2737 if (ret) { 2738 mlog_errno(ret); 2739 goto out; 2740 } 2741 2742 /* 2743 * Adjust extent record count or inline data size 2744 * to reserve space for extended attribute. 2745 */ 2746 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 2747 struct ocfs2_inline_data *idata = &di->id2.i_data; 2748 le16_add_cpu(&idata->id_count, -xattrsize); 2749 } else if (!(ocfs2_inode_is_fast_symlink(inode))) { 2750 struct ocfs2_extent_list *el = &di->id2.i_list; 2751 le16_add_cpu(&el->l_count, -(xattrsize / 2752 sizeof(struct ocfs2_extent_rec))); 2753 } 2754 di->i_xattr_inline_size = cpu_to_le16(xattrsize); 2755 2756 spin_lock(&oi->ip_lock); 2757 oi->ip_dyn_features |= OCFS2_INLINE_XATTR_FL|OCFS2_HAS_XATTR_FL; 2758 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 2759 spin_unlock(&oi->ip_lock); 2760 2761 ocfs2_journal_dirty(ctxt->handle, di_bh); 2762 2763 out: 2764 return ret; 2765 } 2766 2767 /* 2768 * ocfs2_xattr_ibody_set() 2769 * 2770 * Set, replace or remove an extended attribute into inode block. 2771 * 2772 */ 2773 static int ocfs2_xattr_ibody_set(struct inode *inode, 2774 struct ocfs2_xattr_info *xi, 2775 struct ocfs2_xattr_search *xs, 2776 struct ocfs2_xattr_set_ctxt *ctxt) 2777 { 2778 int ret; 2779 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2780 struct ocfs2_xa_loc loc; 2781 2782 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) 2783 return -ENOSPC; 2784 2785 down_write(&oi->ip_alloc_sem); 2786 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { 2787 ret = ocfs2_xattr_ibody_init(inode, xs->inode_bh, ctxt); 2788 if (ret) { 2789 if (ret != -ENOSPC) 2790 mlog_errno(ret); 2791 goto out; 2792 } 2793 } 2794 2795 ocfs2_init_dinode_xa_loc(&loc, inode, xs->inode_bh, 2796 xs->not_found ? NULL : xs->here); 2797 ret = ocfs2_xa_set(&loc, xi, ctxt); 2798 if (ret) { 2799 if (ret != -ENOSPC) 2800 mlog_errno(ret); 2801 goto out; 2802 } 2803 xs->here = loc.xl_entry; 2804 2805 out: 2806 up_write(&oi->ip_alloc_sem); 2807 2808 return ret; 2809 } 2810 2811 /* 2812 * ocfs2_xattr_block_find() 2813 * 2814 * Find extended attribute in external block and 2815 * fill search info into struct ocfs2_xattr_search. 2816 */ 2817 static int ocfs2_xattr_block_find(struct inode *inode, 2818 int name_index, 2819 const char *name, 2820 struct ocfs2_xattr_search *xs) 2821 { 2822 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 2823 struct buffer_head *blk_bh = NULL; 2824 struct ocfs2_xattr_block *xb; 2825 int ret = 0; 2826 2827 if (!di->i_xattr_loc) 2828 return ret; 2829 2830 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc), 2831 &blk_bh); 2832 if (ret < 0) { 2833 mlog_errno(ret); 2834 return ret; 2835 } 2836 2837 xs->xattr_bh = blk_bh; 2838 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 2839 2840 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 2841 xs->header = &xb->xb_attrs.xb_header; 2842 xs->base = (void *)xs->header; 2843 xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size; 2844 xs->here = xs->header->xh_entries; 2845 2846 ret = ocfs2_xattr_find_entry(name_index, name, xs); 2847 } else 2848 ret = ocfs2_xattr_index_block_find(inode, blk_bh, 2849 name_index, 2850 name, xs); 2851 2852 if (ret && ret != -ENODATA) { 2853 xs->xattr_bh = NULL; 2854 goto cleanup; 2855 } 2856 xs->not_found = ret; 2857 return 0; 2858 cleanup: 2859 brelse(blk_bh); 2860 2861 return ret; 2862 } 2863 2864 static int ocfs2_create_xattr_block(struct inode *inode, 2865 struct buffer_head *inode_bh, 2866 struct ocfs2_xattr_set_ctxt *ctxt, 2867 int indexed, 2868 struct buffer_head **ret_bh) 2869 { 2870 int ret; 2871 u16 suballoc_bit_start; 2872 u32 num_got; 2873 u64 suballoc_loc, first_blkno; 2874 struct ocfs2_dinode *di = (struct ocfs2_dinode *)inode_bh->b_data; 2875 struct buffer_head *new_bh = NULL; 2876 struct ocfs2_xattr_block *xblk; 2877 2878 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), 2879 inode_bh, OCFS2_JOURNAL_ACCESS_CREATE); 2880 if (ret < 0) { 2881 mlog_errno(ret); 2882 goto end; 2883 } 2884 2885 ret = ocfs2_claim_metadata(ctxt->handle, ctxt->meta_ac, 1, 2886 &suballoc_loc, &suballoc_bit_start, 2887 &num_got, &first_blkno); 2888 if (ret < 0) { 2889 mlog_errno(ret); 2890 goto end; 2891 } 2892 2893 new_bh = sb_getblk(inode->i_sb, first_blkno); 2894 if (!new_bh) { 2895 ret = -ENOMEM; 2896 mlog_errno(ret); 2897 goto end; 2898 } 2899 2900 ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh); 2901 2902 ret = ocfs2_journal_access_xb(ctxt->handle, INODE_CACHE(inode), 2903 new_bh, 2904 OCFS2_JOURNAL_ACCESS_CREATE); 2905 if (ret < 0) { 2906 mlog_errno(ret); 2907 goto end; 2908 } 2909 2910 /* Initialize ocfs2_xattr_block */ 2911 xblk = (struct ocfs2_xattr_block *)new_bh->b_data; 2912 memset(xblk, 0, inode->i_sb->s_blocksize); 2913 strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE); 2914 xblk->xb_suballoc_slot = cpu_to_le16(ctxt->meta_ac->ac_alloc_slot); 2915 xblk->xb_suballoc_loc = cpu_to_le64(suballoc_loc); 2916 xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start); 2917 xblk->xb_fs_generation = 2918 cpu_to_le32(OCFS2_SB(inode->i_sb)->fs_generation); 2919 xblk->xb_blkno = cpu_to_le64(first_blkno); 2920 if (indexed) { 2921 struct ocfs2_xattr_tree_root *xr = &xblk->xb_attrs.xb_root; 2922 xr->xt_clusters = cpu_to_le32(1); 2923 xr->xt_last_eb_blk = 0; 2924 xr->xt_list.l_tree_depth = 0; 2925 xr->xt_list.l_count = cpu_to_le16( 2926 ocfs2_xattr_recs_per_xb(inode->i_sb)); 2927 xr->xt_list.l_next_free_rec = cpu_to_le16(1); 2928 xblk->xb_flags = cpu_to_le16(OCFS2_XATTR_INDEXED); 2929 } 2930 ocfs2_journal_dirty(ctxt->handle, new_bh); 2931 2932 /* Add it to the inode */ 2933 di->i_xattr_loc = cpu_to_le64(first_blkno); 2934 2935 spin_lock(&OCFS2_I(inode)->ip_lock); 2936 OCFS2_I(inode)->ip_dyn_features |= OCFS2_HAS_XATTR_FL; 2937 di->i_dyn_features = cpu_to_le16(OCFS2_I(inode)->ip_dyn_features); 2938 spin_unlock(&OCFS2_I(inode)->ip_lock); 2939 2940 ocfs2_journal_dirty(ctxt->handle, inode_bh); 2941 2942 *ret_bh = new_bh; 2943 new_bh = NULL; 2944 2945 end: 2946 brelse(new_bh); 2947 return ret; 2948 } 2949 2950 /* 2951 * ocfs2_xattr_block_set() 2952 * 2953 * Set, replace or remove an extended attribute into external block. 2954 * 2955 */ 2956 static int ocfs2_xattr_block_set(struct inode *inode, 2957 struct ocfs2_xattr_info *xi, 2958 struct ocfs2_xattr_search *xs, 2959 struct ocfs2_xattr_set_ctxt *ctxt) 2960 { 2961 struct buffer_head *new_bh = NULL; 2962 struct ocfs2_xattr_block *xblk = NULL; 2963 int ret; 2964 struct ocfs2_xa_loc loc; 2965 2966 if (!xs->xattr_bh) { 2967 ret = ocfs2_create_xattr_block(inode, xs->inode_bh, ctxt, 2968 0, &new_bh); 2969 if (ret) { 2970 mlog_errno(ret); 2971 goto end; 2972 } 2973 2974 xs->xattr_bh = new_bh; 2975 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; 2976 xs->header = &xblk->xb_attrs.xb_header; 2977 xs->base = (void *)xs->header; 2978 xs->end = (void *)xblk + inode->i_sb->s_blocksize; 2979 xs->here = xs->header->xh_entries; 2980 } else 2981 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; 2982 2983 if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) { 2984 ocfs2_init_xattr_block_xa_loc(&loc, inode, xs->xattr_bh, 2985 xs->not_found ? NULL : xs->here); 2986 2987 ret = ocfs2_xa_set(&loc, xi, ctxt); 2988 if (!ret) 2989 xs->here = loc.xl_entry; 2990 else if ((ret != -ENOSPC) || ctxt->set_abort) 2991 goto end; 2992 else { 2993 ret = ocfs2_xattr_create_index_block(inode, xs, ctxt); 2994 if (ret) 2995 goto end; 2996 } 2997 } 2998 2999 if (le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED) 3000 ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt); 3001 3002 end: 3003 return ret; 3004 } 3005 3006 /* Check whether the new xattr can be inserted into the inode. */ 3007 static int ocfs2_xattr_can_be_in_inode(struct inode *inode, 3008 struct ocfs2_xattr_info *xi, 3009 struct ocfs2_xattr_search *xs) 3010 { 3011 struct ocfs2_xattr_entry *last; 3012 int free, i; 3013 size_t min_offs = xs->end - xs->base; 3014 3015 if (!xs->header) 3016 return 0; 3017 3018 last = xs->header->xh_entries; 3019 3020 for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) { 3021 size_t offs = le16_to_cpu(last->xe_name_offset); 3022 if (offs < min_offs) 3023 min_offs = offs; 3024 last += 1; 3025 } 3026 3027 free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP; 3028 if (free < 0) 3029 return 0; 3030 3031 BUG_ON(!xs->not_found); 3032 3033 if (free >= (sizeof(struct ocfs2_xattr_entry) + namevalue_size_xi(xi))) 3034 return 1; 3035 3036 return 0; 3037 } 3038 3039 static int ocfs2_calc_xattr_set_need(struct inode *inode, 3040 struct ocfs2_dinode *di, 3041 struct ocfs2_xattr_info *xi, 3042 struct ocfs2_xattr_search *xis, 3043 struct ocfs2_xattr_search *xbs, 3044 int *clusters_need, 3045 int *meta_need, 3046 int *credits_need) 3047 { 3048 int ret = 0, old_in_xb = 0; 3049 int clusters_add = 0, meta_add = 0, credits = 0; 3050 struct buffer_head *bh = NULL; 3051 struct ocfs2_xattr_block *xb = NULL; 3052 struct ocfs2_xattr_entry *xe = NULL; 3053 struct ocfs2_xattr_value_root *xv = NULL; 3054 char *base = NULL; 3055 int name_offset, name_len = 0; 3056 u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, 3057 xi->xi_value_len); 3058 u64 value_size; 3059 3060 /* 3061 * Calculate the clusters we need to write. 3062 * No matter whether we replace an old one or add a new one, 3063 * we need this for writing. 3064 */ 3065 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) 3066 credits += new_clusters * 3067 ocfs2_clusters_to_blocks(inode->i_sb, 1); 3068 3069 if (xis->not_found && xbs->not_found) { 3070 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3071 3072 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 3073 clusters_add += new_clusters; 3074 credits += ocfs2_calc_extend_credits(inode->i_sb, 3075 &def_xv.xv.xr_list); 3076 } 3077 3078 goto meta_guess; 3079 } 3080 3081 if (!xis->not_found) { 3082 xe = xis->here; 3083 name_offset = le16_to_cpu(xe->xe_name_offset); 3084 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 3085 base = xis->base; 3086 credits += OCFS2_INODE_UPDATE_CREDITS; 3087 } else { 3088 int i, block_off = 0; 3089 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; 3090 xe = xbs->here; 3091 name_offset = le16_to_cpu(xe->xe_name_offset); 3092 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 3093 i = xbs->here - xbs->header->xh_entries; 3094 old_in_xb = 1; 3095 3096 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 3097 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 3098 bucket_xh(xbs->bucket), 3099 i, &block_off, 3100 &name_offset); 3101 base = bucket_block(xbs->bucket, block_off); 3102 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3103 } else { 3104 base = xbs->base; 3105 credits += OCFS2_XATTR_BLOCK_UPDATE_CREDITS; 3106 } 3107 } 3108 3109 /* 3110 * delete a xattr doesn't need metadata and cluster allocation. 3111 * so just calculate the credits and return. 3112 * 3113 * The credits for removing the value tree will be extended 3114 * by ocfs2_remove_extent itself. 3115 */ 3116 if (!xi->xi_value) { 3117 if (!ocfs2_xattr_is_local(xe)) 3118 credits += ocfs2_remove_extent_credits(inode->i_sb); 3119 3120 goto out; 3121 } 3122 3123 /* do cluster allocation guess first. */ 3124 value_size = le64_to_cpu(xe->xe_value_size); 3125 3126 if (old_in_xb) { 3127 /* 3128 * In xattr set, we always try to set the xe in inode first, 3129 * so if it can be inserted into inode successfully, the old 3130 * one will be removed from the xattr block, and this xattr 3131 * will be inserted into inode as a new xattr in inode. 3132 */ 3133 if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) { 3134 clusters_add += new_clusters; 3135 credits += ocfs2_remove_extent_credits(inode->i_sb) + 3136 OCFS2_INODE_UPDATE_CREDITS; 3137 if (!ocfs2_xattr_is_local(xe)) 3138 credits += ocfs2_calc_extend_credits( 3139 inode->i_sb, 3140 &def_xv.xv.xr_list); 3141 goto out; 3142 } 3143 } 3144 3145 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 3146 /* the new values will be stored outside. */ 3147 u32 old_clusters = 0; 3148 3149 if (!ocfs2_xattr_is_local(xe)) { 3150 old_clusters = ocfs2_clusters_for_bytes(inode->i_sb, 3151 value_size); 3152 xv = (struct ocfs2_xattr_value_root *) 3153 (base + name_offset + name_len); 3154 value_size = OCFS2_XATTR_ROOT_SIZE; 3155 } else 3156 xv = &def_xv.xv; 3157 3158 if (old_clusters >= new_clusters) { 3159 credits += ocfs2_remove_extent_credits(inode->i_sb); 3160 goto out; 3161 } else { 3162 meta_add += ocfs2_extend_meta_needed(&xv->xr_list); 3163 clusters_add += new_clusters - old_clusters; 3164 credits += ocfs2_calc_extend_credits(inode->i_sb, 3165 &xv->xr_list); 3166 if (value_size >= OCFS2_XATTR_ROOT_SIZE) 3167 goto out; 3168 } 3169 } else { 3170 /* 3171 * Now the new value will be stored inside. So if the new 3172 * value is smaller than the size of value root or the old 3173 * value, we don't need any allocation, otherwise we have 3174 * to guess metadata allocation. 3175 */ 3176 if ((ocfs2_xattr_is_local(xe) && 3177 (value_size >= xi->xi_value_len)) || 3178 (!ocfs2_xattr_is_local(xe) && 3179 OCFS2_XATTR_ROOT_SIZE >= xi->xi_value_len)) 3180 goto out; 3181 } 3182 3183 meta_guess: 3184 /* calculate metadata allocation. */ 3185 if (di->i_xattr_loc) { 3186 if (!xbs->xattr_bh) { 3187 ret = ocfs2_read_xattr_block(inode, 3188 le64_to_cpu(di->i_xattr_loc), 3189 &bh); 3190 if (ret) { 3191 mlog_errno(ret); 3192 goto out; 3193 } 3194 3195 xb = (struct ocfs2_xattr_block *)bh->b_data; 3196 } else 3197 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; 3198 3199 /* 3200 * If there is already an xattr tree, good, we can calculate 3201 * like other b-trees. Otherwise we may have the chance of 3202 * create a tree, the credit calculation is borrowed from 3203 * ocfs2_calc_extend_credits with root_el = NULL. And the 3204 * new tree will be cluster based, so no meta is needed. 3205 */ 3206 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 3207 struct ocfs2_extent_list *el = 3208 &xb->xb_attrs.xb_root.xt_list; 3209 meta_add += ocfs2_extend_meta_needed(el); 3210 credits += ocfs2_calc_extend_credits(inode->i_sb, 3211 el); 3212 } else 3213 credits += OCFS2_SUBALLOC_ALLOC + 1; 3214 3215 /* 3216 * This cluster will be used either for new bucket or for 3217 * new xattr block. 3218 * If the cluster size is the same as the bucket size, one 3219 * more is needed since we may need to extend the bucket 3220 * also. 3221 */ 3222 clusters_add += 1; 3223 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3224 if (OCFS2_XATTR_BUCKET_SIZE == 3225 OCFS2_SB(inode->i_sb)->s_clustersize) { 3226 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3227 clusters_add += 1; 3228 } 3229 } else { 3230 credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; 3231 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 3232 struct ocfs2_extent_list *el = &def_xv.xv.xr_list; 3233 meta_add += ocfs2_extend_meta_needed(el); 3234 credits += ocfs2_calc_extend_credits(inode->i_sb, 3235 el); 3236 } else { 3237 meta_add += 1; 3238 } 3239 } 3240 out: 3241 if (clusters_need) 3242 *clusters_need = clusters_add; 3243 if (meta_need) 3244 *meta_need = meta_add; 3245 if (credits_need) 3246 *credits_need = credits; 3247 brelse(bh); 3248 return ret; 3249 } 3250 3251 static int ocfs2_init_xattr_set_ctxt(struct inode *inode, 3252 struct ocfs2_dinode *di, 3253 struct ocfs2_xattr_info *xi, 3254 struct ocfs2_xattr_search *xis, 3255 struct ocfs2_xattr_search *xbs, 3256 struct ocfs2_xattr_set_ctxt *ctxt, 3257 int extra_meta, 3258 int *credits) 3259 { 3260 int clusters_add, meta_add, ret; 3261 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 3262 3263 memset(ctxt, 0, sizeof(struct ocfs2_xattr_set_ctxt)); 3264 3265 ocfs2_init_dealloc_ctxt(&ctxt->dealloc); 3266 3267 ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs, 3268 &clusters_add, &meta_add, credits); 3269 if (ret) { 3270 mlog_errno(ret); 3271 return ret; 3272 } 3273 3274 meta_add += extra_meta; 3275 trace_ocfs2_init_xattr_set_ctxt(xi->xi_name, meta_add, 3276 clusters_add, *credits); 3277 3278 if (meta_add) { 3279 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, 3280 &ctxt->meta_ac); 3281 if (ret) { 3282 mlog_errno(ret); 3283 goto out; 3284 } 3285 } 3286 3287 if (clusters_add) { 3288 ret = ocfs2_reserve_clusters(osb, clusters_add, &ctxt->data_ac); 3289 if (ret) 3290 mlog_errno(ret); 3291 } 3292 out: 3293 if (ret) { 3294 if (ctxt->meta_ac) { 3295 ocfs2_free_alloc_context(ctxt->meta_ac); 3296 ctxt->meta_ac = NULL; 3297 } 3298 3299 /* 3300 * We cannot have an error and a non null ctxt->data_ac. 3301 */ 3302 } 3303 3304 return ret; 3305 } 3306 3307 static int __ocfs2_xattr_set_handle(struct inode *inode, 3308 struct ocfs2_dinode *di, 3309 struct ocfs2_xattr_info *xi, 3310 struct ocfs2_xattr_search *xis, 3311 struct ocfs2_xattr_search *xbs, 3312 struct ocfs2_xattr_set_ctxt *ctxt) 3313 { 3314 int ret = 0, credits, old_found; 3315 3316 if (!xi->xi_value) { 3317 /* Remove existing extended attribute */ 3318 if (!xis->not_found) 3319 ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt); 3320 else if (!xbs->not_found) 3321 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); 3322 } else { 3323 /* We always try to set extended attribute into inode first*/ 3324 ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt); 3325 if (!ret && !xbs->not_found) { 3326 /* 3327 * If succeed and that extended attribute existing in 3328 * external block, then we will remove it. 3329 */ 3330 xi->xi_value = NULL; 3331 xi->xi_value_len = 0; 3332 3333 old_found = xis->not_found; 3334 xis->not_found = -ENODATA; 3335 ret = ocfs2_calc_xattr_set_need(inode, 3336 di, 3337 xi, 3338 xis, 3339 xbs, 3340 NULL, 3341 NULL, 3342 &credits); 3343 xis->not_found = old_found; 3344 if (ret) { 3345 mlog_errno(ret); 3346 goto out; 3347 } 3348 3349 ret = ocfs2_extend_trans(ctxt->handle, credits); 3350 if (ret) { 3351 mlog_errno(ret); 3352 goto out; 3353 } 3354 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); 3355 } else if ((ret == -ENOSPC) && !ctxt->set_abort) { 3356 if (di->i_xattr_loc && !xbs->xattr_bh) { 3357 ret = ocfs2_xattr_block_find(inode, 3358 xi->xi_name_index, 3359 xi->xi_name, xbs); 3360 if (ret) 3361 goto out; 3362 3363 old_found = xis->not_found; 3364 xis->not_found = -ENODATA; 3365 ret = ocfs2_calc_xattr_set_need(inode, 3366 di, 3367 xi, 3368 xis, 3369 xbs, 3370 NULL, 3371 NULL, 3372 &credits); 3373 xis->not_found = old_found; 3374 if (ret) { 3375 mlog_errno(ret); 3376 goto out; 3377 } 3378 3379 ret = ocfs2_extend_trans(ctxt->handle, credits); 3380 if (ret) { 3381 mlog_errno(ret); 3382 goto out; 3383 } 3384 } 3385 /* 3386 * If no space in inode, we will set extended attribute 3387 * into external block. 3388 */ 3389 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); 3390 if (ret) 3391 goto out; 3392 if (!xis->not_found) { 3393 /* 3394 * If succeed and that extended attribute 3395 * existing in inode, we will remove it. 3396 */ 3397 xi->xi_value = NULL; 3398 xi->xi_value_len = 0; 3399 xbs->not_found = -ENODATA; 3400 ret = ocfs2_calc_xattr_set_need(inode, 3401 di, 3402 xi, 3403 xis, 3404 xbs, 3405 NULL, 3406 NULL, 3407 &credits); 3408 if (ret) { 3409 mlog_errno(ret); 3410 goto out; 3411 } 3412 3413 ret = ocfs2_extend_trans(ctxt->handle, credits); 3414 if (ret) { 3415 mlog_errno(ret); 3416 goto out; 3417 } 3418 ret = ocfs2_xattr_ibody_set(inode, xi, 3419 xis, ctxt); 3420 } 3421 } 3422 } 3423 3424 if (!ret) { 3425 /* Update inode ctime. */ 3426 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), 3427 xis->inode_bh, 3428 OCFS2_JOURNAL_ACCESS_WRITE); 3429 if (ret) { 3430 mlog_errno(ret); 3431 goto out; 3432 } 3433 3434 inode->i_ctime = current_time(inode); 3435 di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); 3436 di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); 3437 ocfs2_journal_dirty(ctxt->handle, xis->inode_bh); 3438 } 3439 out: 3440 return ret; 3441 } 3442 3443 /* 3444 * This function only called duing creating inode 3445 * for init security/acl xattrs of the new inode. 3446 * All transanction credits have been reserved in mknod. 3447 */ 3448 int ocfs2_xattr_set_handle(handle_t *handle, 3449 struct inode *inode, 3450 struct buffer_head *di_bh, 3451 int name_index, 3452 const char *name, 3453 const void *value, 3454 size_t value_len, 3455 int flags, 3456 struct ocfs2_alloc_context *meta_ac, 3457 struct ocfs2_alloc_context *data_ac) 3458 { 3459 struct ocfs2_dinode *di; 3460 int ret; 3461 3462 struct ocfs2_xattr_info xi = { 3463 .xi_name_index = name_index, 3464 .xi_name = name, 3465 .xi_name_len = strlen(name), 3466 .xi_value = value, 3467 .xi_value_len = value_len, 3468 }; 3469 3470 struct ocfs2_xattr_search xis = { 3471 .not_found = -ENODATA, 3472 }; 3473 3474 struct ocfs2_xattr_search xbs = { 3475 .not_found = -ENODATA, 3476 }; 3477 3478 struct ocfs2_xattr_set_ctxt ctxt = { 3479 .handle = handle, 3480 .meta_ac = meta_ac, 3481 .data_ac = data_ac, 3482 }; 3483 3484 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) 3485 return -EOPNOTSUPP; 3486 3487 /* 3488 * In extreme situation, may need xattr bucket when 3489 * block size is too small. And we have already reserved 3490 * the credits for bucket in mknod. 3491 */ 3492 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) { 3493 xbs.bucket = ocfs2_xattr_bucket_new(inode); 3494 if (!xbs.bucket) { 3495 mlog_errno(-ENOMEM); 3496 return -ENOMEM; 3497 } 3498 } 3499 3500 xis.inode_bh = xbs.inode_bh = di_bh; 3501 di = (struct ocfs2_dinode *)di_bh->b_data; 3502 3503 down_write(&OCFS2_I(inode)->ip_xattr_sem); 3504 3505 ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis); 3506 if (ret) 3507 goto cleanup; 3508 if (xis.not_found) { 3509 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs); 3510 if (ret) 3511 goto cleanup; 3512 } 3513 3514 ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt); 3515 3516 cleanup: 3517 up_write(&OCFS2_I(inode)->ip_xattr_sem); 3518 brelse(xbs.xattr_bh); 3519 ocfs2_xattr_bucket_free(xbs.bucket); 3520 3521 return ret; 3522 } 3523 3524 /* 3525 * ocfs2_xattr_set() 3526 * 3527 * Set, replace or remove an extended attribute for this inode. 3528 * value is NULL to remove an existing extended attribute, else either 3529 * create or replace an extended attribute. 3530 */ 3531 int ocfs2_xattr_set(struct inode *inode, 3532 int name_index, 3533 const char *name, 3534 const void *value, 3535 size_t value_len, 3536 int flags) 3537 { 3538 struct buffer_head *di_bh = NULL; 3539 struct ocfs2_dinode *di; 3540 int ret, credits, ref_meta = 0, ref_credits = 0; 3541 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 3542 struct inode *tl_inode = osb->osb_tl_inode; 3543 struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, NULL, }; 3544 struct ocfs2_refcount_tree *ref_tree = NULL; 3545 3546 struct ocfs2_xattr_info xi = { 3547 .xi_name_index = name_index, 3548 .xi_name = name, 3549 .xi_name_len = strlen(name), 3550 .xi_value = value, 3551 .xi_value_len = value_len, 3552 }; 3553 3554 struct ocfs2_xattr_search xis = { 3555 .not_found = -ENODATA, 3556 }; 3557 3558 struct ocfs2_xattr_search xbs = { 3559 .not_found = -ENODATA, 3560 }; 3561 3562 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) 3563 return -EOPNOTSUPP; 3564 3565 /* 3566 * Only xbs will be used on indexed trees. xis doesn't need a 3567 * bucket. 3568 */ 3569 xbs.bucket = ocfs2_xattr_bucket_new(inode); 3570 if (!xbs.bucket) { 3571 mlog_errno(-ENOMEM); 3572 return -ENOMEM; 3573 } 3574 3575 ret = ocfs2_inode_lock(inode, &di_bh, 1); 3576 if (ret < 0) { 3577 mlog_errno(ret); 3578 goto cleanup_nolock; 3579 } 3580 xis.inode_bh = xbs.inode_bh = di_bh; 3581 di = (struct ocfs2_dinode *)di_bh->b_data; 3582 3583 down_write(&OCFS2_I(inode)->ip_xattr_sem); 3584 /* 3585 * Scan inode and external block to find the same name 3586 * extended attribute and collect search information. 3587 */ 3588 ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis); 3589 if (ret) 3590 goto cleanup; 3591 if (xis.not_found) { 3592 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs); 3593 if (ret) 3594 goto cleanup; 3595 } 3596 3597 if (xis.not_found && xbs.not_found) { 3598 ret = -ENODATA; 3599 if (flags & XATTR_REPLACE) 3600 goto cleanup; 3601 ret = 0; 3602 if (!value) 3603 goto cleanup; 3604 } else { 3605 ret = -EEXIST; 3606 if (flags & XATTR_CREATE) 3607 goto cleanup; 3608 } 3609 3610 /* Check whether the value is refcounted and do some preparation. */ 3611 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL && 3612 (!xis.not_found || !xbs.not_found)) { 3613 ret = ocfs2_prepare_refcount_xattr(inode, di, &xi, 3614 &xis, &xbs, &ref_tree, 3615 &ref_meta, &ref_credits); 3616 if (ret) { 3617 mlog_errno(ret); 3618 goto cleanup; 3619 } 3620 } 3621 3622 inode_lock(tl_inode); 3623 3624 if (ocfs2_truncate_log_needs_flush(osb)) { 3625 ret = __ocfs2_flush_truncate_log(osb); 3626 if (ret < 0) { 3627 inode_unlock(tl_inode); 3628 mlog_errno(ret); 3629 goto cleanup; 3630 } 3631 } 3632 inode_unlock(tl_inode); 3633 3634 ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis, 3635 &xbs, &ctxt, ref_meta, &credits); 3636 if (ret) { 3637 mlog_errno(ret); 3638 goto cleanup; 3639 } 3640 3641 /* we need to update inode's ctime field, so add credit for it. */ 3642 credits += OCFS2_INODE_UPDATE_CREDITS; 3643 ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits); 3644 if (IS_ERR(ctxt.handle)) { 3645 ret = PTR_ERR(ctxt.handle); 3646 mlog_errno(ret); 3647 goto out_free_ac; 3648 } 3649 3650 ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt); 3651 ocfs2_update_inode_fsync_trans(ctxt.handle, inode, 0); 3652 3653 ocfs2_commit_trans(osb, ctxt.handle); 3654 3655 out_free_ac: 3656 if (ctxt.data_ac) 3657 ocfs2_free_alloc_context(ctxt.data_ac); 3658 if (ctxt.meta_ac) 3659 ocfs2_free_alloc_context(ctxt.meta_ac); 3660 if (ocfs2_dealloc_has_cluster(&ctxt.dealloc)) 3661 ocfs2_schedule_truncate_log_flush(osb, 1); 3662 ocfs2_run_deallocs(osb, &ctxt.dealloc); 3663 3664 cleanup: 3665 if (ref_tree) 3666 ocfs2_unlock_refcount_tree(osb, ref_tree, 1); 3667 up_write(&OCFS2_I(inode)->ip_xattr_sem); 3668 if (!value && !ret) { 3669 ret = ocfs2_try_remove_refcount_tree(inode, di_bh); 3670 if (ret) 3671 mlog_errno(ret); 3672 } 3673 ocfs2_inode_unlock(inode, 1); 3674 cleanup_nolock: 3675 brelse(di_bh); 3676 brelse(xbs.xattr_bh); 3677 ocfs2_xattr_bucket_free(xbs.bucket); 3678 3679 return ret; 3680 } 3681 3682 /* 3683 * Find the xattr extent rec which may contains name_hash. 3684 * e_cpos will be the first name hash of the xattr rec. 3685 * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list. 3686 */ 3687 static int ocfs2_xattr_get_rec(struct inode *inode, 3688 u32 name_hash, 3689 u64 *p_blkno, 3690 u32 *e_cpos, 3691 u32 *num_clusters, 3692 struct ocfs2_extent_list *el) 3693 { 3694 int ret = 0, i; 3695 struct buffer_head *eb_bh = NULL; 3696 struct ocfs2_extent_block *eb; 3697 struct ocfs2_extent_rec *rec = NULL; 3698 u64 e_blkno = 0; 3699 3700 if (el->l_tree_depth) { 3701 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, name_hash, 3702 &eb_bh); 3703 if (ret) { 3704 mlog_errno(ret); 3705 goto out; 3706 } 3707 3708 eb = (struct ocfs2_extent_block *) eb_bh->b_data; 3709 el = &eb->h_list; 3710 3711 if (el->l_tree_depth) { 3712 ret = ocfs2_error(inode->i_sb, 3713 "Inode %lu has non zero tree depth in xattr tree block %llu\n", 3714 inode->i_ino, 3715 (unsigned long long)eb_bh->b_blocknr); 3716 goto out; 3717 } 3718 } 3719 3720 for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) { 3721 rec = &el->l_recs[i]; 3722 3723 if (le32_to_cpu(rec->e_cpos) <= name_hash) { 3724 e_blkno = le64_to_cpu(rec->e_blkno); 3725 break; 3726 } 3727 } 3728 3729 if (!e_blkno) { 3730 ret = ocfs2_error(inode->i_sb, "Inode %lu has bad extent record (%u, %u, 0) in xattr\n", 3731 inode->i_ino, 3732 le32_to_cpu(rec->e_cpos), 3733 ocfs2_rec_clusters(el, rec)); 3734 goto out; 3735 } 3736 3737 *p_blkno = le64_to_cpu(rec->e_blkno); 3738 *num_clusters = le16_to_cpu(rec->e_leaf_clusters); 3739 if (e_cpos) 3740 *e_cpos = le32_to_cpu(rec->e_cpos); 3741 out: 3742 brelse(eb_bh); 3743 return ret; 3744 } 3745 3746 typedef int (xattr_bucket_func)(struct inode *inode, 3747 struct ocfs2_xattr_bucket *bucket, 3748 void *para); 3749 3750 static int ocfs2_find_xe_in_bucket(struct inode *inode, 3751 struct ocfs2_xattr_bucket *bucket, 3752 int name_index, 3753 const char *name, 3754 u32 name_hash, 3755 u16 *xe_index, 3756 int *found) 3757 { 3758 int i, ret = 0, cmp = 1, block_off, new_offset; 3759 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 3760 size_t name_len = strlen(name); 3761 struct ocfs2_xattr_entry *xe = NULL; 3762 char *xe_name; 3763 3764 /* 3765 * We don't use binary search in the bucket because there 3766 * may be multiple entries with the same name hash. 3767 */ 3768 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 3769 xe = &xh->xh_entries[i]; 3770 3771 if (name_hash > le32_to_cpu(xe->xe_name_hash)) 3772 continue; 3773 else if (name_hash < le32_to_cpu(xe->xe_name_hash)) 3774 break; 3775 3776 cmp = name_index - ocfs2_xattr_get_type(xe); 3777 if (!cmp) 3778 cmp = name_len - xe->xe_name_len; 3779 if (cmp) 3780 continue; 3781 3782 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 3783 xh, 3784 i, 3785 &block_off, 3786 &new_offset); 3787 if (ret) { 3788 mlog_errno(ret); 3789 break; 3790 } 3791 3792 3793 xe_name = bucket_block(bucket, block_off) + new_offset; 3794 if (!memcmp(name, xe_name, name_len)) { 3795 *xe_index = i; 3796 *found = 1; 3797 ret = 0; 3798 break; 3799 } 3800 } 3801 3802 return ret; 3803 } 3804 3805 /* 3806 * Find the specified xattr entry in a series of buckets. 3807 * This series start from p_blkno and last for num_clusters. 3808 * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains 3809 * the num of the valid buckets. 3810 * 3811 * Return the buffer_head this xattr should reside in. And if the xattr's 3812 * hash is in the gap of 2 buckets, return the lower bucket. 3813 */ 3814 static int ocfs2_xattr_bucket_find(struct inode *inode, 3815 int name_index, 3816 const char *name, 3817 u32 name_hash, 3818 u64 p_blkno, 3819 u32 first_hash, 3820 u32 num_clusters, 3821 struct ocfs2_xattr_search *xs) 3822 { 3823 int ret, found = 0; 3824 struct ocfs2_xattr_header *xh = NULL; 3825 struct ocfs2_xattr_entry *xe = NULL; 3826 u16 index = 0; 3827 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3828 int low_bucket = 0, bucket, high_bucket; 3829 struct ocfs2_xattr_bucket *search; 3830 u32 last_hash; 3831 u64 blkno, lower_blkno = 0; 3832 3833 search = ocfs2_xattr_bucket_new(inode); 3834 if (!search) { 3835 ret = -ENOMEM; 3836 mlog_errno(ret); 3837 goto out; 3838 } 3839 3840 ret = ocfs2_read_xattr_bucket(search, p_blkno); 3841 if (ret) { 3842 mlog_errno(ret); 3843 goto out; 3844 } 3845 3846 xh = bucket_xh(search); 3847 high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1; 3848 while (low_bucket <= high_bucket) { 3849 ocfs2_xattr_bucket_relse(search); 3850 3851 bucket = (low_bucket + high_bucket) / 2; 3852 blkno = p_blkno + bucket * blk_per_bucket; 3853 ret = ocfs2_read_xattr_bucket(search, blkno); 3854 if (ret) { 3855 mlog_errno(ret); 3856 goto out; 3857 } 3858 3859 xh = bucket_xh(search); 3860 xe = &xh->xh_entries[0]; 3861 if (name_hash < le32_to_cpu(xe->xe_name_hash)) { 3862 high_bucket = bucket - 1; 3863 continue; 3864 } 3865 3866 /* 3867 * Check whether the hash of the last entry in our 3868 * bucket is larger than the search one. for an empty 3869 * bucket, the last one is also the first one. 3870 */ 3871 if (xh->xh_count) 3872 xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1]; 3873 3874 last_hash = le32_to_cpu(xe->xe_name_hash); 3875 3876 /* record lower_blkno which may be the insert place. */ 3877 lower_blkno = blkno; 3878 3879 if (name_hash > le32_to_cpu(xe->xe_name_hash)) { 3880 low_bucket = bucket + 1; 3881 continue; 3882 } 3883 3884 /* the searched xattr should reside in this bucket if exists. */ 3885 ret = ocfs2_find_xe_in_bucket(inode, search, 3886 name_index, name, name_hash, 3887 &index, &found); 3888 if (ret) { 3889 mlog_errno(ret); 3890 goto out; 3891 } 3892 break; 3893 } 3894 3895 /* 3896 * Record the bucket we have found. 3897 * When the xattr's hash value is in the gap of 2 buckets, we will 3898 * always set it to the previous bucket. 3899 */ 3900 if (!lower_blkno) 3901 lower_blkno = p_blkno; 3902 3903 /* This should be in cache - we just read it during the search */ 3904 ret = ocfs2_read_xattr_bucket(xs->bucket, lower_blkno); 3905 if (ret) { 3906 mlog_errno(ret); 3907 goto out; 3908 } 3909 3910 xs->header = bucket_xh(xs->bucket); 3911 xs->base = bucket_block(xs->bucket, 0); 3912 xs->end = xs->base + inode->i_sb->s_blocksize; 3913 3914 if (found) { 3915 xs->here = &xs->header->xh_entries[index]; 3916 trace_ocfs2_xattr_bucket_find(OCFS2_I(inode)->ip_blkno, 3917 name, name_index, name_hash, 3918 (unsigned long long)bucket_blkno(xs->bucket), 3919 index); 3920 } else 3921 ret = -ENODATA; 3922 3923 out: 3924 ocfs2_xattr_bucket_free(search); 3925 return ret; 3926 } 3927 3928 static int ocfs2_xattr_index_block_find(struct inode *inode, 3929 struct buffer_head *root_bh, 3930 int name_index, 3931 const char *name, 3932 struct ocfs2_xattr_search *xs) 3933 { 3934 int ret; 3935 struct ocfs2_xattr_block *xb = 3936 (struct ocfs2_xattr_block *)root_bh->b_data; 3937 struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root; 3938 struct ocfs2_extent_list *el = &xb_root->xt_list; 3939 u64 p_blkno = 0; 3940 u32 first_hash, num_clusters = 0; 3941 u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name)); 3942 3943 if (le16_to_cpu(el->l_next_free_rec) == 0) 3944 return -ENODATA; 3945 3946 trace_ocfs2_xattr_index_block_find(OCFS2_I(inode)->ip_blkno, 3947 name, name_index, name_hash, 3948 (unsigned long long)root_bh->b_blocknr, 3949 -1); 3950 3951 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash, 3952 &num_clusters, el); 3953 if (ret) { 3954 mlog_errno(ret); 3955 goto out; 3956 } 3957 3958 BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash); 3959 3960 trace_ocfs2_xattr_index_block_find_rec(OCFS2_I(inode)->ip_blkno, 3961 name, name_index, first_hash, 3962 (unsigned long long)p_blkno, 3963 num_clusters); 3964 3965 ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash, 3966 p_blkno, first_hash, num_clusters, xs); 3967 3968 out: 3969 return ret; 3970 } 3971 3972 static int ocfs2_iterate_xattr_buckets(struct inode *inode, 3973 u64 blkno, 3974 u32 clusters, 3975 xattr_bucket_func *func, 3976 void *para) 3977 { 3978 int i, ret = 0; 3979 u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)); 3980 u32 num_buckets = clusters * bpc; 3981 struct ocfs2_xattr_bucket *bucket; 3982 3983 bucket = ocfs2_xattr_bucket_new(inode); 3984 if (!bucket) { 3985 mlog_errno(-ENOMEM); 3986 return -ENOMEM; 3987 } 3988 3989 trace_ocfs2_iterate_xattr_buckets( 3990 (unsigned long long)OCFS2_I(inode)->ip_blkno, 3991 (unsigned long long)blkno, clusters); 3992 3993 for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) { 3994 ret = ocfs2_read_xattr_bucket(bucket, blkno); 3995 if (ret) { 3996 mlog_errno(ret); 3997 break; 3998 } 3999 4000 /* 4001 * The real bucket num in this series of blocks is stored 4002 * in the 1st bucket. 4003 */ 4004 if (i == 0) 4005 num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets); 4006 4007 trace_ocfs2_iterate_xattr_bucket((unsigned long long)blkno, 4008 le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash)); 4009 if (func) { 4010 ret = func(inode, bucket, para); 4011 if (ret && ret != -ERANGE) 4012 mlog_errno(ret); 4013 /* Fall through to bucket_relse() */ 4014 } 4015 4016 ocfs2_xattr_bucket_relse(bucket); 4017 if (ret) 4018 break; 4019 } 4020 4021 ocfs2_xattr_bucket_free(bucket); 4022 return ret; 4023 } 4024 4025 struct ocfs2_xattr_tree_list { 4026 char *buffer; 4027 size_t buffer_size; 4028 size_t result; 4029 }; 4030 4031 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb, 4032 struct ocfs2_xattr_header *xh, 4033 int index, 4034 int *block_off, 4035 int *new_offset) 4036 { 4037 u16 name_offset; 4038 4039 if (index < 0 || index >= le16_to_cpu(xh->xh_count)) 4040 return -EINVAL; 4041 4042 name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset); 4043 4044 *block_off = name_offset >> sb->s_blocksize_bits; 4045 *new_offset = name_offset % sb->s_blocksize; 4046 4047 return 0; 4048 } 4049 4050 static int ocfs2_list_xattr_bucket(struct inode *inode, 4051 struct ocfs2_xattr_bucket *bucket, 4052 void *para) 4053 { 4054 int ret = 0, type; 4055 struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para; 4056 int i, block_off, new_offset; 4057 const char *name; 4058 4059 for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) { 4060 struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i]; 4061 type = ocfs2_xattr_get_type(entry); 4062 4063 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 4064 bucket_xh(bucket), 4065 i, 4066 &block_off, 4067 &new_offset); 4068 if (ret) 4069 break; 4070 4071 name = (const char *)bucket_block(bucket, block_off) + 4072 new_offset; 4073 ret = ocfs2_xattr_list_entry(inode->i_sb, 4074 xl->buffer, 4075 xl->buffer_size, 4076 &xl->result, 4077 type, name, 4078 entry->xe_name_len); 4079 if (ret) 4080 break; 4081 } 4082 4083 return ret; 4084 } 4085 4086 static int ocfs2_iterate_xattr_index_block(struct inode *inode, 4087 struct buffer_head *blk_bh, 4088 xattr_tree_rec_func *rec_func, 4089 void *para) 4090 { 4091 struct ocfs2_xattr_block *xb = 4092 (struct ocfs2_xattr_block *)blk_bh->b_data; 4093 struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list; 4094 int ret = 0; 4095 u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0; 4096 u64 p_blkno = 0; 4097 4098 if (!el->l_next_free_rec || !rec_func) 4099 return 0; 4100 4101 while (name_hash > 0) { 4102 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, 4103 &e_cpos, &num_clusters, el); 4104 if (ret) { 4105 mlog_errno(ret); 4106 break; 4107 } 4108 4109 ret = rec_func(inode, blk_bh, p_blkno, e_cpos, 4110 num_clusters, para); 4111 if (ret) { 4112 if (ret != -ERANGE) 4113 mlog_errno(ret); 4114 break; 4115 } 4116 4117 if (e_cpos == 0) 4118 break; 4119 4120 name_hash = e_cpos - 1; 4121 } 4122 4123 return ret; 4124 4125 } 4126 4127 static int ocfs2_list_xattr_tree_rec(struct inode *inode, 4128 struct buffer_head *root_bh, 4129 u64 blkno, u32 cpos, u32 len, void *para) 4130 { 4131 return ocfs2_iterate_xattr_buckets(inode, blkno, len, 4132 ocfs2_list_xattr_bucket, para); 4133 } 4134 4135 static int ocfs2_xattr_tree_list_index_block(struct inode *inode, 4136 struct buffer_head *blk_bh, 4137 char *buffer, 4138 size_t buffer_size) 4139 { 4140 int ret; 4141 struct ocfs2_xattr_tree_list xl = { 4142 .buffer = buffer, 4143 .buffer_size = buffer_size, 4144 .result = 0, 4145 }; 4146 4147 ret = ocfs2_iterate_xattr_index_block(inode, blk_bh, 4148 ocfs2_list_xattr_tree_rec, &xl); 4149 if (ret) { 4150 mlog_errno(ret); 4151 goto out; 4152 } 4153 4154 ret = xl.result; 4155 out: 4156 return ret; 4157 } 4158 4159 static int cmp_xe(const void *a, const void *b) 4160 { 4161 const struct ocfs2_xattr_entry *l = a, *r = b; 4162 u32 l_hash = le32_to_cpu(l->xe_name_hash); 4163 u32 r_hash = le32_to_cpu(r->xe_name_hash); 4164 4165 if (l_hash > r_hash) 4166 return 1; 4167 if (l_hash < r_hash) 4168 return -1; 4169 return 0; 4170 } 4171 4172 static void swap_xe(void *a, void *b, int size) 4173 { 4174 struct ocfs2_xattr_entry *l = a, *r = b, tmp; 4175 4176 tmp = *l; 4177 memcpy(l, r, sizeof(struct ocfs2_xattr_entry)); 4178 memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry)); 4179 } 4180 4181 /* 4182 * When the ocfs2_xattr_block is filled up, new bucket will be created 4183 * and all the xattr entries will be moved to the new bucket. 4184 * The header goes at the start of the bucket, and the names+values are 4185 * filled from the end. This is why *target starts as the last buffer. 4186 * Note: we need to sort the entries since they are not saved in order 4187 * in the ocfs2_xattr_block. 4188 */ 4189 static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode, 4190 struct buffer_head *xb_bh, 4191 struct ocfs2_xattr_bucket *bucket) 4192 { 4193 int i, blocksize = inode->i_sb->s_blocksize; 4194 int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 4195 u16 offset, size, off_change; 4196 struct ocfs2_xattr_entry *xe; 4197 struct ocfs2_xattr_block *xb = 4198 (struct ocfs2_xattr_block *)xb_bh->b_data; 4199 struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header; 4200 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 4201 u16 count = le16_to_cpu(xb_xh->xh_count); 4202 char *src = xb_bh->b_data; 4203 char *target = bucket_block(bucket, blks - 1); 4204 4205 trace_ocfs2_cp_xattr_block_to_bucket_begin( 4206 (unsigned long long)xb_bh->b_blocknr, 4207 (unsigned long long)bucket_blkno(bucket)); 4208 4209 for (i = 0; i < blks; i++) 4210 memset(bucket_block(bucket, i), 0, blocksize); 4211 4212 /* 4213 * Since the xe_name_offset is based on ocfs2_xattr_header, 4214 * there is a offset change corresponding to the change of 4215 * ocfs2_xattr_header's position. 4216 */ 4217 off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); 4218 xe = &xb_xh->xh_entries[count - 1]; 4219 offset = le16_to_cpu(xe->xe_name_offset) + off_change; 4220 size = blocksize - offset; 4221 4222 /* copy all the names and values. */ 4223 memcpy(target + offset, src + offset, size); 4224 4225 /* Init new header now. */ 4226 xh->xh_count = xb_xh->xh_count; 4227 xh->xh_num_buckets = cpu_to_le16(1); 4228 xh->xh_name_value_len = cpu_to_le16(size); 4229 xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size); 4230 4231 /* copy all the entries. */ 4232 target = bucket_block(bucket, 0); 4233 offset = offsetof(struct ocfs2_xattr_header, xh_entries); 4234 size = count * sizeof(struct ocfs2_xattr_entry); 4235 memcpy(target + offset, (char *)xb_xh + offset, size); 4236 4237 /* Change the xe offset for all the xe because of the move. */ 4238 off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize + 4239 offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); 4240 for (i = 0; i < count; i++) 4241 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change); 4242 4243 trace_ocfs2_cp_xattr_block_to_bucket_end(offset, size, off_change); 4244 4245 sort(target + offset, count, sizeof(struct ocfs2_xattr_entry), 4246 cmp_xe, swap_xe); 4247 } 4248 4249 /* 4250 * After we move xattr from block to index btree, we have to 4251 * update ocfs2_xattr_search to the new xe and base. 4252 * 4253 * When the entry is in xattr block, xattr_bh indicates the storage place. 4254 * While if the entry is in index b-tree, "bucket" indicates the 4255 * real place of the xattr. 4256 */ 4257 static void ocfs2_xattr_update_xattr_search(struct inode *inode, 4258 struct ocfs2_xattr_search *xs, 4259 struct buffer_head *old_bh) 4260 { 4261 char *buf = old_bh->b_data; 4262 struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf; 4263 struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header; 4264 int i; 4265 4266 xs->header = bucket_xh(xs->bucket); 4267 xs->base = bucket_block(xs->bucket, 0); 4268 xs->end = xs->base + inode->i_sb->s_blocksize; 4269 4270 if (xs->not_found) 4271 return; 4272 4273 i = xs->here - old_xh->xh_entries; 4274 xs->here = &xs->header->xh_entries[i]; 4275 } 4276 4277 static int ocfs2_xattr_create_index_block(struct inode *inode, 4278 struct ocfs2_xattr_search *xs, 4279 struct ocfs2_xattr_set_ctxt *ctxt) 4280 { 4281 int ret; 4282 u32 bit_off, len; 4283 u64 blkno; 4284 handle_t *handle = ctxt->handle; 4285 struct ocfs2_inode_info *oi = OCFS2_I(inode); 4286 struct buffer_head *xb_bh = xs->xattr_bh; 4287 struct ocfs2_xattr_block *xb = 4288 (struct ocfs2_xattr_block *)xb_bh->b_data; 4289 struct ocfs2_xattr_tree_root *xr; 4290 u16 xb_flags = le16_to_cpu(xb->xb_flags); 4291 4292 trace_ocfs2_xattr_create_index_block_begin( 4293 (unsigned long long)xb_bh->b_blocknr); 4294 4295 BUG_ON(xb_flags & OCFS2_XATTR_INDEXED); 4296 BUG_ON(!xs->bucket); 4297 4298 /* 4299 * XXX: 4300 * We can use this lock for now, and maybe move to a dedicated mutex 4301 * if performance becomes a problem later. 4302 */ 4303 down_write(&oi->ip_alloc_sem); 4304 4305 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), xb_bh, 4306 OCFS2_JOURNAL_ACCESS_WRITE); 4307 if (ret) { 4308 mlog_errno(ret); 4309 goto out; 4310 } 4311 4312 ret = __ocfs2_claim_clusters(handle, ctxt->data_ac, 4313 1, 1, &bit_off, &len); 4314 if (ret) { 4315 mlog_errno(ret); 4316 goto out; 4317 } 4318 4319 /* 4320 * The bucket may spread in many blocks, and 4321 * we will only touch the 1st block and the last block 4322 * in the whole bucket(one for entry and one for data). 4323 */ 4324 blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off); 4325 4326 trace_ocfs2_xattr_create_index_block((unsigned long long)blkno); 4327 4328 ret = ocfs2_init_xattr_bucket(xs->bucket, blkno, 1); 4329 if (ret) { 4330 mlog_errno(ret); 4331 goto out; 4332 } 4333 4334 ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket, 4335 OCFS2_JOURNAL_ACCESS_CREATE); 4336 if (ret) { 4337 mlog_errno(ret); 4338 goto out; 4339 } 4340 4341 ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket); 4342 ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket); 4343 4344 ocfs2_xattr_update_xattr_search(inode, xs, xb_bh); 4345 4346 /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */ 4347 memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize - 4348 offsetof(struct ocfs2_xattr_block, xb_attrs)); 4349 4350 xr = &xb->xb_attrs.xb_root; 4351 xr->xt_clusters = cpu_to_le32(1); 4352 xr->xt_last_eb_blk = 0; 4353 xr->xt_list.l_tree_depth = 0; 4354 xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb)); 4355 xr->xt_list.l_next_free_rec = cpu_to_le16(1); 4356 4357 xr->xt_list.l_recs[0].e_cpos = 0; 4358 xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno); 4359 xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1); 4360 4361 xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED); 4362 4363 ocfs2_journal_dirty(handle, xb_bh); 4364 4365 out: 4366 up_write(&oi->ip_alloc_sem); 4367 4368 return ret; 4369 } 4370 4371 static int cmp_xe_offset(const void *a, const void *b) 4372 { 4373 const struct ocfs2_xattr_entry *l = a, *r = b; 4374 u32 l_name_offset = le16_to_cpu(l->xe_name_offset); 4375 u32 r_name_offset = le16_to_cpu(r->xe_name_offset); 4376 4377 if (l_name_offset < r_name_offset) 4378 return 1; 4379 if (l_name_offset > r_name_offset) 4380 return -1; 4381 return 0; 4382 } 4383 4384 /* 4385 * defrag a xattr bucket if we find that the bucket has some 4386 * holes beteen name/value pairs. 4387 * We will move all the name/value pairs to the end of the bucket 4388 * so that we can spare some space for insertion. 4389 */ 4390 static int ocfs2_defrag_xattr_bucket(struct inode *inode, 4391 handle_t *handle, 4392 struct ocfs2_xattr_bucket *bucket) 4393 { 4394 int ret, i; 4395 size_t end, offset, len; 4396 struct ocfs2_xattr_header *xh; 4397 char *entries, *buf, *bucket_buf = NULL; 4398 u64 blkno = bucket_blkno(bucket); 4399 u16 xh_free_start; 4400 size_t blocksize = inode->i_sb->s_blocksize; 4401 struct ocfs2_xattr_entry *xe; 4402 4403 /* 4404 * In order to make the operation more efficient and generic, 4405 * we copy all the blocks into a contiguous memory and do the 4406 * defragment there, so if anything is error, we will not touch 4407 * the real block. 4408 */ 4409 bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS); 4410 if (!bucket_buf) { 4411 ret = -EIO; 4412 goto out; 4413 } 4414 4415 buf = bucket_buf; 4416 for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize) 4417 memcpy(buf, bucket_block(bucket, i), blocksize); 4418 4419 ret = ocfs2_xattr_bucket_journal_access(handle, bucket, 4420 OCFS2_JOURNAL_ACCESS_WRITE); 4421 if (ret < 0) { 4422 mlog_errno(ret); 4423 goto out; 4424 } 4425 4426 xh = (struct ocfs2_xattr_header *)bucket_buf; 4427 entries = (char *)xh->xh_entries; 4428 xh_free_start = le16_to_cpu(xh->xh_free_start); 4429 4430 trace_ocfs2_defrag_xattr_bucket( 4431 (unsigned long long)blkno, le16_to_cpu(xh->xh_count), 4432 xh_free_start, le16_to_cpu(xh->xh_name_value_len)); 4433 4434 /* 4435 * sort all the entries by their offset. 4436 * the largest will be the first, so that we can 4437 * move them to the end one by one. 4438 */ 4439 sort(entries, le16_to_cpu(xh->xh_count), 4440 sizeof(struct ocfs2_xattr_entry), 4441 cmp_xe_offset, swap_xe); 4442 4443 /* Move all name/values to the end of the bucket. */ 4444 xe = xh->xh_entries; 4445 end = OCFS2_XATTR_BUCKET_SIZE; 4446 for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) { 4447 offset = le16_to_cpu(xe->xe_name_offset); 4448 len = namevalue_size_xe(xe); 4449 4450 /* 4451 * We must make sure that the name/value pair 4452 * exist in the same block. So adjust end to 4453 * the previous block end if needed. 4454 */ 4455 if (((end - len) / blocksize != 4456 (end - 1) / blocksize)) 4457 end = end - end % blocksize; 4458 4459 if (end > offset + len) { 4460 memmove(bucket_buf + end - len, 4461 bucket_buf + offset, len); 4462 xe->xe_name_offset = cpu_to_le16(end - len); 4463 } 4464 4465 mlog_bug_on_msg(end < offset + len, "Defrag check failed for " 4466 "bucket %llu\n", (unsigned long long)blkno); 4467 4468 end -= len; 4469 } 4470 4471 mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for " 4472 "bucket %llu\n", (unsigned long long)blkno); 4473 4474 if (xh_free_start == end) 4475 goto out; 4476 4477 memset(bucket_buf + xh_free_start, 0, end - xh_free_start); 4478 xh->xh_free_start = cpu_to_le16(end); 4479 4480 /* sort the entries by their name_hash. */ 4481 sort(entries, le16_to_cpu(xh->xh_count), 4482 sizeof(struct ocfs2_xattr_entry), 4483 cmp_xe, swap_xe); 4484 4485 buf = bucket_buf; 4486 for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize) 4487 memcpy(bucket_block(bucket, i), buf, blocksize); 4488 ocfs2_xattr_bucket_journal_dirty(handle, bucket); 4489 4490 out: 4491 kfree(bucket_buf); 4492 return ret; 4493 } 4494 4495 /* 4496 * prev_blkno points to the start of an existing extent. new_blkno 4497 * points to a newly allocated extent. Because we know each of our 4498 * clusters contains more than bucket, we can easily split one cluster 4499 * at a bucket boundary. So we take the last cluster of the existing 4500 * extent and split it down the middle. We move the last half of the 4501 * buckets in the last cluster of the existing extent over to the new 4502 * extent. 4503 * 4504 * first_bh is the buffer at prev_blkno so we can update the existing 4505 * extent's bucket count. header_bh is the bucket were we were hoping 4506 * to insert our xattr. If the bucket move places the target in the new 4507 * extent, we'll update first_bh and header_bh after modifying the old 4508 * extent. 4509 * 4510 * first_hash will be set as the 1st xe's name_hash in the new extent. 4511 */ 4512 static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode, 4513 handle_t *handle, 4514 struct ocfs2_xattr_bucket *first, 4515 struct ocfs2_xattr_bucket *target, 4516 u64 new_blkno, 4517 u32 num_clusters, 4518 u32 *first_hash) 4519 { 4520 int ret; 4521 struct super_block *sb = inode->i_sb; 4522 int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(sb); 4523 int num_buckets = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb)); 4524 int to_move = num_buckets / 2; 4525 u64 src_blkno; 4526 u64 last_cluster_blkno = bucket_blkno(first) + 4527 ((num_clusters - 1) * ocfs2_clusters_to_blocks(sb, 1)); 4528 4529 BUG_ON(le16_to_cpu(bucket_xh(first)->xh_num_buckets) < num_buckets); 4530 BUG_ON(OCFS2_XATTR_BUCKET_SIZE == OCFS2_SB(sb)->s_clustersize); 4531 4532 trace_ocfs2_mv_xattr_bucket_cross_cluster( 4533 (unsigned long long)last_cluster_blkno, 4534 (unsigned long long)new_blkno); 4535 4536 ret = ocfs2_mv_xattr_buckets(inode, handle, bucket_blkno(first), 4537 last_cluster_blkno, new_blkno, 4538 to_move, first_hash); 4539 if (ret) { 4540 mlog_errno(ret); 4541 goto out; 4542 } 4543 4544 /* This is the first bucket that got moved */ 4545 src_blkno = last_cluster_blkno + (to_move * blks_per_bucket); 4546 4547 /* 4548 * If the target bucket was part of the moved buckets, we need to 4549 * update first and target. 4550 */ 4551 if (bucket_blkno(target) >= src_blkno) { 4552 /* Find the block for the new target bucket */ 4553 src_blkno = new_blkno + 4554 (bucket_blkno(target) - src_blkno); 4555 4556 ocfs2_xattr_bucket_relse(first); 4557 ocfs2_xattr_bucket_relse(target); 4558 4559 /* 4560 * These shouldn't fail - the buffers are in the 4561 * journal from ocfs2_cp_xattr_bucket(). 4562 */ 4563 ret = ocfs2_read_xattr_bucket(first, new_blkno); 4564 if (ret) { 4565 mlog_errno(ret); 4566 goto out; 4567 } 4568 ret = ocfs2_read_xattr_bucket(target, src_blkno); 4569 if (ret) 4570 mlog_errno(ret); 4571 4572 } 4573 4574 out: 4575 return ret; 4576 } 4577 4578 /* 4579 * Find the suitable pos when we divide a bucket into 2. 4580 * We have to make sure the xattrs with the same hash value exist 4581 * in the same bucket. 4582 * 4583 * If this ocfs2_xattr_header covers more than one hash value, find a 4584 * place where the hash value changes. Try to find the most even split. 4585 * The most common case is that all entries have different hash values, 4586 * and the first check we make will find a place to split. 4587 */ 4588 static int ocfs2_xattr_find_divide_pos(struct ocfs2_xattr_header *xh) 4589 { 4590 struct ocfs2_xattr_entry *entries = xh->xh_entries; 4591 int count = le16_to_cpu(xh->xh_count); 4592 int delta, middle = count / 2; 4593 4594 /* 4595 * We start at the middle. Each step gets farther away in both 4596 * directions. We therefore hit the change in hash value 4597 * nearest to the middle. Note that this loop does not execute for 4598 * count < 2. 4599 */ 4600 for (delta = 0; delta < middle; delta++) { 4601 /* Let's check delta earlier than middle */ 4602 if (cmp_xe(&entries[middle - delta - 1], 4603 &entries[middle - delta])) 4604 return middle - delta; 4605 4606 /* For even counts, don't walk off the end */ 4607 if ((middle + delta + 1) == count) 4608 continue; 4609 4610 /* Now try delta past middle */ 4611 if (cmp_xe(&entries[middle + delta], 4612 &entries[middle + delta + 1])) 4613 return middle + delta + 1; 4614 } 4615 4616 /* Every entry had the same hash */ 4617 return count; 4618 } 4619 4620 /* 4621 * Move some xattrs in old bucket(blk) to new bucket(new_blk). 4622 * first_hash will record the 1st hash of the new bucket. 4623 * 4624 * Normally half of the xattrs will be moved. But we have to make 4625 * sure that the xattrs with the same hash value are stored in the 4626 * same bucket. If all the xattrs in this bucket have the same hash 4627 * value, the new bucket will be initialized as an empty one and the 4628 * first_hash will be initialized as (hash_value+1). 4629 */ 4630 static int ocfs2_divide_xattr_bucket(struct inode *inode, 4631 handle_t *handle, 4632 u64 blk, 4633 u64 new_blk, 4634 u32 *first_hash, 4635 int new_bucket_head) 4636 { 4637 int ret, i; 4638 int count, start, len, name_value_len = 0, name_offset = 0; 4639 struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL; 4640 struct ocfs2_xattr_header *xh; 4641 struct ocfs2_xattr_entry *xe; 4642 int blocksize = inode->i_sb->s_blocksize; 4643 4644 trace_ocfs2_divide_xattr_bucket_begin((unsigned long long)blk, 4645 (unsigned long long)new_blk); 4646 4647 s_bucket = ocfs2_xattr_bucket_new(inode); 4648 t_bucket = ocfs2_xattr_bucket_new(inode); 4649 if (!s_bucket || !t_bucket) { 4650 ret = -ENOMEM; 4651 mlog_errno(ret); 4652 goto out; 4653 } 4654 4655 ret = ocfs2_read_xattr_bucket(s_bucket, blk); 4656 if (ret) { 4657 mlog_errno(ret); 4658 goto out; 4659 } 4660 4661 ret = ocfs2_xattr_bucket_journal_access(handle, s_bucket, 4662 OCFS2_JOURNAL_ACCESS_WRITE); 4663 if (ret) { 4664 mlog_errno(ret); 4665 goto out; 4666 } 4667 4668 /* 4669 * Even if !new_bucket_head, we're overwriting t_bucket. Thus, 4670 * there's no need to read it. 4671 */ 4672 ret = ocfs2_init_xattr_bucket(t_bucket, new_blk, new_bucket_head); 4673 if (ret) { 4674 mlog_errno(ret); 4675 goto out; 4676 } 4677 4678 /* 4679 * Hey, if we're overwriting t_bucket, what difference does 4680 * ACCESS_CREATE vs ACCESS_WRITE make? See the comment in the 4681 * same part of ocfs2_cp_xattr_bucket(). 4682 */ 4683 ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket, 4684 new_bucket_head ? 4685 OCFS2_JOURNAL_ACCESS_CREATE : 4686 OCFS2_JOURNAL_ACCESS_WRITE); 4687 if (ret) { 4688 mlog_errno(ret); 4689 goto out; 4690 } 4691 4692 xh = bucket_xh(s_bucket); 4693 count = le16_to_cpu(xh->xh_count); 4694 start = ocfs2_xattr_find_divide_pos(xh); 4695 4696 if (start == count) { 4697 xe = &xh->xh_entries[start-1]; 4698 4699 /* 4700 * initialized a new empty bucket here. 4701 * The hash value is set as one larger than 4702 * that of the last entry in the previous bucket. 4703 */ 4704 for (i = 0; i < t_bucket->bu_blocks; i++) 4705 memset(bucket_block(t_bucket, i), 0, blocksize); 4706 4707 xh = bucket_xh(t_bucket); 4708 xh->xh_free_start = cpu_to_le16(blocksize); 4709 xh->xh_entries[0].xe_name_hash = xe->xe_name_hash; 4710 le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1); 4711 4712 goto set_num_buckets; 4713 } 4714 4715 /* copy the whole bucket to the new first. */ 4716 ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket); 4717 4718 /* update the new bucket. */ 4719 xh = bucket_xh(t_bucket); 4720 4721 /* 4722 * Calculate the total name/value len and xh_free_start for 4723 * the old bucket first. 4724 */ 4725 name_offset = OCFS2_XATTR_BUCKET_SIZE; 4726 name_value_len = 0; 4727 for (i = 0; i < start; i++) { 4728 xe = &xh->xh_entries[i]; 4729 name_value_len += namevalue_size_xe(xe); 4730 if (le16_to_cpu(xe->xe_name_offset) < name_offset) 4731 name_offset = le16_to_cpu(xe->xe_name_offset); 4732 } 4733 4734 /* 4735 * Now begin the modification to the new bucket. 4736 * 4737 * In the new bucket, We just move the xattr entry to the beginning 4738 * and don't touch the name/value. So there will be some holes in the 4739 * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is 4740 * called. 4741 */ 4742 xe = &xh->xh_entries[start]; 4743 len = sizeof(struct ocfs2_xattr_entry) * (count - start); 4744 trace_ocfs2_divide_xattr_bucket_move(len, 4745 (int)((char *)xe - (char *)xh), 4746 (int)((char *)xh->xh_entries - (char *)xh)); 4747 memmove((char *)xh->xh_entries, (char *)xe, len); 4748 xe = &xh->xh_entries[count - start]; 4749 len = sizeof(struct ocfs2_xattr_entry) * start; 4750 memset((char *)xe, 0, len); 4751 4752 le16_add_cpu(&xh->xh_count, -start); 4753 le16_add_cpu(&xh->xh_name_value_len, -name_value_len); 4754 4755 /* Calculate xh_free_start for the new bucket. */ 4756 xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE); 4757 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 4758 xe = &xh->xh_entries[i]; 4759 if (le16_to_cpu(xe->xe_name_offset) < 4760 le16_to_cpu(xh->xh_free_start)) 4761 xh->xh_free_start = xe->xe_name_offset; 4762 } 4763 4764 set_num_buckets: 4765 /* set xh->xh_num_buckets for the new xh. */ 4766 if (new_bucket_head) 4767 xh->xh_num_buckets = cpu_to_le16(1); 4768 else 4769 xh->xh_num_buckets = 0; 4770 4771 ocfs2_xattr_bucket_journal_dirty(handle, t_bucket); 4772 4773 /* store the first_hash of the new bucket. */ 4774 if (first_hash) 4775 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash); 4776 4777 /* 4778 * Now only update the 1st block of the old bucket. If we 4779 * just added a new empty bucket, there is no need to modify 4780 * it. 4781 */ 4782 if (start == count) 4783 goto out; 4784 4785 xh = bucket_xh(s_bucket); 4786 memset(&xh->xh_entries[start], 0, 4787 sizeof(struct ocfs2_xattr_entry) * (count - start)); 4788 xh->xh_count = cpu_to_le16(start); 4789 xh->xh_free_start = cpu_to_le16(name_offset); 4790 xh->xh_name_value_len = cpu_to_le16(name_value_len); 4791 4792 ocfs2_xattr_bucket_journal_dirty(handle, s_bucket); 4793 4794 out: 4795 ocfs2_xattr_bucket_free(s_bucket); 4796 ocfs2_xattr_bucket_free(t_bucket); 4797 4798 return ret; 4799 } 4800 4801 /* 4802 * Copy xattr from one bucket to another bucket. 4803 * 4804 * The caller must make sure that the journal transaction 4805 * has enough space for journaling. 4806 */ 4807 static int ocfs2_cp_xattr_bucket(struct inode *inode, 4808 handle_t *handle, 4809 u64 s_blkno, 4810 u64 t_blkno, 4811 int t_is_new) 4812 { 4813 int ret; 4814 struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL; 4815 4816 BUG_ON(s_blkno == t_blkno); 4817 4818 trace_ocfs2_cp_xattr_bucket((unsigned long long)s_blkno, 4819 (unsigned long long)t_blkno, 4820 t_is_new); 4821 4822 s_bucket = ocfs2_xattr_bucket_new(inode); 4823 t_bucket = ocfs2_xattr_bucket_new(inode); 4824 if (!s_bucket || !t_bucket) { 4825 ret = -ENOMEM; 4826 mlog_errno(ret); 4827 goto out; 4828 } 4829 4830 ret = ocfs2_read_xattr_bucket(s_bucket, s_blkno); 4831 if (ret) 4832 goto out; 4833 4834 /* 4835 * Even if !t_is_new, we're overwriting t_bucket. Thus, 4836 * there's no need to read it. 4837 */ 4838 ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno, t_is_new); 4839 if (ret) 4840 goto out; 4841 4842 /* 4843 * Hey, if we're overwriting t_bucket, what difference does 4844 * ACCESS_CREATE vs ACCESS_WRITE make? Well, if we allocated a new 4845 * cluster to fill, we came here from 4846 * ocfs2_mv_xattr_buckets(), and it is really new - 4847 * ACCESS_CREATE is required. But we also might have moved data 4848 * out of t_bucket before extending back into it. 4849 * ocfs2_add_new_xattr_bucket() can do this - its call to 4850 * ocfs2_add_new_xattr_cluster() may have created a new extent 4851 * and copied out the end of the old extent. Then it re-extends 4852 * the old extent back to create space for new xattrs. That's 4853 * how we get here, and the bucket isn't really new. 4854 */ 4855 ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket, 4856 t_is_new ? 4857 OCFS2_JOURNAL_ACCESS_CREATE : 4858 OCFS2_JOURNAL_ACCESS_WRITE); 4859 if (ret) 4860 goto out; 4861 4862 ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket); 4863 ocfs2_xattr_bucket_journal_dirty(handle, t_bucket); 4864 4865 out: 4866 ocfs2_xattr_bucket_free(t_bucket); 4867 ocfs2_xattr_bucket_free(s_bucket); 4868 4869 return ret; 4870 } 4871 4872 /* 4873 * src_blk points to the start of an existing extent. last_blk points to 4874 * last cluster in that extent. to_blk points to a newly allocated 4875 * extent. We copy the buckets from the cluster at last_blk to the new 4876 * extent. If start_bucket is non-zero, we skip that many buckets before 4877 * we start copying. The new extent's xh_num_buckets gets set to the 4878 * number of buckets we copied. The old extent's xh_num_buckets shrinks 4879 * by the same amount. 4880 */ 4881 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle, 4882 u64 src_blk, u64 last_blk, u64 to_blk, 4883 unsigned int start_bucket, 4884 u32 *first_hash) 4885 { 4886 int i, ret, credits; 4887 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 4888 int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 4889 int num_buckets = ocfs2_xattr_buckets_per_cluster(osb); 4890 struct ocfs2_xattr_bucket *old_first, *new_first; 4891 4892 trace_ocfs2_mv_xattr_buckets((unsigned long long)last_blk, 4893 (unsigned long long)to_blk); 4894 4895 BUG_ON(start_bucket >= num_buckets); 4896 if (start_bucket) { 4897 num_buckets -= start_bucket; 4898 last_blk += (start_bucket * blks_per_bucket); 4899 } 4900 4901 /* The first bucket of the original extent */ 4902 old_first = ocfs2_xattr_bucket_new(inode); 4903 /* The first bucket of the new extent */ 4904 new_first = ocfs2_xattr_bucket_new(inode); 4905 if (!old_first || !new_first) { 4906 ret = -ENOMEM; 4907 mlog_errno(ret); 4908 goto out; 4909 } 4910 4911 ret = ocfs2_read_xattr_bucket(old_first, src_blk); 4912 if (ret) { 4913 mlog_errno(ret); 4914 goto out; 4915 } 4916 4917 /* 4918 * We need to update the first bucket of the old extent and all 4919 * the buckets going to the new extent. 4920 */ 4921 credits = ((num_buckets + 1) * blks_per_bucket); 4922 ret = ocfs2_extend_trans(handle, credits); 4923 if (ret) { 4924 mlog_errno(ret); 4925 goto out; 4926 } 4927 4928 ret = ocfs2_xattr_bucket_journal_access(handle, old_first, 4929 OCFS2_JOURNAL_ACCESS_WRITE); 4930 if (ret) { 4931 mlog_errno(ret); 4932 goto out; 4933 } 4934 4935 for (i = 0; i < num_buckets; i++) { 4936 ret = ocfs2_cp_xattr_bucket(inode, handle, 4937 last_blk + (i * blks_per_bucket), 4938 to_blk + (i * blks_per_bucket), 4939 1); 4940 if (ret) { 4941 mlog_errno(ret); 4942 goto out; 4943 } 4944 } 4945 4946 /* 4947 * Get the new bucket ready before we dirty anything 4948 * (This actually shouldn't fail, because we already dirtied 4949 * it once in ocfs2_cp_xattr_bucket()). 4950 */ 4951 ret = ocfs2_read_xattr_bucket(new_first, to_blk); 4952 if (ret) { 4953 mlog_errno(ret); 4954 goto out; 4955 } 4956 ret = ocfs2_xattr_bucket_journal_access(handle, new_first, 4957 OCFS2_JOURNAL_ACCESS_WRITE); 4958 if (ret) { 4959 mlog_errno(ret); 4960 goto out; 4961 } 4962 4963 /* Now update the headers */ 4964 le16_add_cpu(&bucket_xh(old_first)->xh_num_buckets, -num_buckets); 4965 ocfs2_xattr_bucket_journal_dirty(handle, old_first); 4966 4967 bucket_xh(new_first)->xh_num_buckets = cpu_to_le16(num_buckets); 4968 ocfs2_xattr_bucket_journal_dirty(handle, new_first); 4969 4970 if (first_hash) 4971 *first_hash = le32_to_cpu(bucket_xh(new_first)->xh_entries[0].xe_name_hash); 4972 4973 out: 4974 ocfs2_xattr_bucket_free(new_first); 4975 ocfs2_xattr_bucket_free(old_first); 4976 return ret; 4977 } 4978 4979 /* 4980 * Move some xattrs in this cluster to the new cluster. 4981 * This function should only be called when bucket size == cluster size. 4982 * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead. 4983 */ 4984 static int ocfs2_divide_xattr_cluster(struct inode *inode, 4985 handle_t *handle, 4986 u64 prev_blk, 4987 u64 new_blk, 4988 u32 *first_hash) 4989 { 4990 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 4991 int ret, credits = 2 * blk_per_bucket; 4992 4993 BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize); 4994 4995 ret = ocfs2_extend_trans(handle, credits); 4996 if (ret) { 4997 mlog_errno(ret); 4998 return ret; 4999 } 5000 5001 /* Move half of the xattr in start_blk to the next bucket. */ 5002 return ocfs2_divide_xattr_bucket(inode, handle, prev_blk, 5003 new_blk, first_hash, 1); 5004 } 5005 5006 /* 5007 * Move some xattrs from the old cluster to the new one since they are not 5008 * contiguous in ocfs2 xattr tree. 5009 * 5010 * new_blk starts a new separate cluster, and we will move some xattrs from 5011 * prev_blk to it. v_start will be set as the first name hash value in this 5012 * new cluster so that it can be used as e_cpos during tree insertion and 5013 * don't collide with our original b-tree operations. first_bh and header_bh 5014 * will also be updated since they will be used in ocfs2_extend_xattr_bucket 5015 * to extend the insert bucket. 5016 * 5017 * The problem is how much xattr should we move to the new one and when should 5018 * we update first_bh and header_bh? 5019 * 1. If cluster size > bucket size, that means the previous cluster has more 5020 * than 1 bucket, so just move half nums of bucket into the new cluster and 5021 * update the first_bh and header_bh if the insert bucket has been moved 5022 * to the new cluster. 5023 * 2. If cluster_size == bucket_size: 5024 * a) If the previous extent rec has more than one cluster and the insert 5025 * place isn't in the last cluster, copy the entire last cluster to the 5026 * new one. This time, we don't need to upate the first_bh and header_bh 5027 * since they will not be moved into the new cluster. 5028 * b) Otherwise, move the bottom half of the xattrs in the last cluster into 5029 * the new one. And we set the extend flag to zero if the insert place is 5030 * moved into the new allocated cluster since no extend is needed. 5031 */ 5032 static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode, 5033 handle_t *handle, 5034 struct ocfs2_xattr_bucket *first, 5035 struct ocfs2_xattr_bucket *target, 5036 u64 new_blk, 5037 u32 prev_clusters, 5038 u32 *v_start, 5039 int *extend) 5040 { 5041 int ret; 5042 5043 trace_ocfs2_adjust_xattr_cross_cluster( 5044 (unsigned long long)bucket_blkno(first), 5045 (unsigned long long)new_blk, prev_clusters); 5046 5047 if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) { 5048 ret = ocfs2_mv_xattr_bucket_cross_cluster(inode, 5049 handle, 5050 first, target, 5051 new_blk, 5052 prev_clusters, 5053 v_start); 5054 if (ret) 5055 mlog_errno(ret); 5056 } else { 5057 /* The start of the last cluster in the first extent */ 5058 u64 last_blk = bucket_blkno(first) + 5059 ((prev_clusters - 1) * 5060 ocfs2_clusters_to_blocks(inode->i_sb, 1)); 5061 5062 if (prev_clusters > 1 && bucket_blkno(target) != last_blk) { 5063 ret = ocfs2_mv_xattr_buckets(inode, handle, 5064 bucket_blkno(first), 5065 last_blk, new_blk, 0, 5066 v_start); 5067 if (ret) 5068 mlog_errno(ret); 5069 } else { 5070 ret = ocfs2_divide_xattr_cluster(inode, handle, 5071 last_blk, new_blk, 5072 v_start); 5073 if (ret) 5074 mlog_errno(ret); 5075 5076 if ((bucket_blkno(target) == last_blk) && extend) 5077 *extend = 0; 5078 } 5079 } 5080 5081 return ret; 5082 } 5083 5084 /* 5085 * Add a new cluster for xattr storage. 5086 * 5087 * If the new cluster is contiguous with the previous one, it will be 5088 * appended to the same extent record, and num_clusters will be updated. 5089 * If not, we will insert a new extent for it and move some xattrs in 5090 * the last cluster into the new allocated one. 5091 * We also need to limit the maximum size of a btree leaf, otherwise we'll 5092 * lose the benefits of hashing because we'll have to search large leaves. 5093 * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize, 5094 * if it's bigger). 5095 * 5096 * first_bh is the first block of the previous extent rec and header_bh 5097 * indicates the bucket we will insert the new xattrs. They will be updated 5098 * when the header_bh is moved into the new cluster. 5099 */ 5100 static int ocfs2_add_new_xattr_cluster(struct inode *inode, 5101 struct buffer_head *root_bh, 5102 struct ocfs2_xattr_bucket *first, 5103 struct ocfs2_xattr_bucket *target, 5104 u32 *num_clusters, 5105 u32 prev_cpos, 5106 int *extend, 5107 struct ocfs2_xattr_set_ctxt *ctxt) 5108 { 5109 int ret; 5110 u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 5111 u32 prev_clusters = *num_clusters; 5112 u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0; 5113 u64 block; 5114 handle_t *handle = ctxt->handle; 5115 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5116 struct ocfs2_extent_tree et; 5117 5118 trace_ocfs2_add_new_xattr_cluster_begin( 5119 (unsigned long long)OCFS2_I(inode)->ip_blkno, 5120 (unsigned long long)bucket_blkno(first), 5121 prev_cpos, prev_clusters); 5122 5123 ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh); 5124 5125 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh, 5126 OCFS2_JOURNAL_ACCESS_WRITE); 5127 if (ret < 0) { 5128 mlog_errno(ret); 5129 goto leave; 5130 } 5131 5132 ret = __ocfs2_claim_clusters(handle, ctxt->data_ac, 1, 5133 clusters_to_add, &bit_off, &num_bits); 5134 if (ret < 0) { 5135 if (ret != -ENOSPC) 5136 mlog_errno(ret); 5137 goto leave; 5138 } 5139 5140 BUG_ON(num_bits > clusters_to_add); 5141 5142 block = ocfs2_clusters_to_blocks(osb->sb, bit_off); 5143 trace_ocfs2_add_new_xattr_cluster((unsigned long long)block, num_bits); 5144 5145 if (bucket_blkno(first) + (prev_clusters * bpc) == block && 5146 (prev_clusters + num_bits) << osb->s_clustersize_bits <= 5147 OCFS2_MAX_XATTR_TREE_LEAF_SIZE) { 5148 /* 5149 * If this cluster is contiguous with the old one and 5150 * adding this new cluster, we don't surpass the limit of 5151 * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be 5152 * initialized and used like other buckets in the previous 5153 * cluster. 5154 * So add it as a contiguous one. The caller will handle 5155 * its init process. 5156 */ 5157 v_start = prev_cpos + prev_clusters; 5158 *num_clusters = prev_clusters + num_bits; 5159 } else { 5160 ret = ocfs2_adjust_xattr_cross_cluster(inode, 5161 handle, 5162 first, 5163 target, 5164 block, 5165 prev_clusters, 5166 &v_start, 5167 extend); 5168 if (ret) { 5169 mlog_errno(ret); 5170 goto leave; 5171 } 5172 } 5173 5174 trace_ocfs2_add_new_xattr_cluster_insert((unsigned long long)block, 5175 v_start, num_bits); 5176 ret = ocfs2_insert_extent(handle, &et, v_start, block, 5177 num_bits, 0, ctxt->meta_ac); 5178 if (ret < 0) { 5179 mlog_errno(ret); 5180 goto leave; 5181 } 5182 5183 ocfs2_journal_dirty(handle, root_bh); 5184 5185 leave: 5186 return ret; 5187 } 5188 5189 /* 5190 * We are given an extent. 'first' is the bucket at the very front of 5191 * the extent. The extent has space for an additional bucket past 5192 * bucket_xh(first)->xh_num_buckets. 'target_blkno' is the block number 5193 * of the target bucket. We wish to shift every bucket past the target 5194 * down one, filling in that additional space. When we get back to the 5195 * target, we split the target between itself and the now-empty bucket 5196 * at target+1 (aka, target_blkno + blks_per_bucket). 5197 */ 5198 static int ocfs2_extend_xattr_bucket(struct inode *inode, 5199 handle_t *handle, 5200 struct ocfs2_xattr_bucket *first, 5201 u64 target_blk, 5202 u32 num_clusters) 5203 { 5204 int ret, credits; 5205 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5206 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 5207 u64 end_blk; 5208 u16 new_bucket = le16_to_cpu(bucket_xh(first)->xh_num_buckets); 5209 5210 trace_ocfs2_extend_xattr_bucket((unsigned long long)target_blk, 5211 (unsigned long long)bucket_blkno(first), 5212 num_clusters, new_bucket); 5213 5214 /* The extent must have room for an additional bucket */ 5215 BUG_ON(new_bucket >= 5216 (num_clusters * ocfs2_xattr_buckets_per_cluster(osb))); 5217 5218 /* end_blk points to the last existing bucket */ 5219 end_blk = bucket_blkno(first) + ((new_bucket - 1) * blk_per_bucket); 5220 5221 /* 5222 * end_blk is the start of the last existing bucket. 5223 * Thus, (end_blk - target_blk) covers the target bucket and 5224 * every bucket after it up to, but not including, the last 5225 * existing bucket. Then we add the last existing bucket, the 5226 * new bucket, and the first bucket (3 * blk_per_bucket). 5227 */ 5228 credits = (end_blk - target_blk) + (3 * blk_per_bucket); 5229 ret = ocfs2_extend_trans(handle, credits); 5230 if (ret) { 5231 mlog_errno(ret); 5232 goto out; 5233 } 5234 5235 ret = ocfs2_xattr_bucket_journal_access(handle, first, 5236 OCFS2_JOURNAL_ACCESS_WRITE); 5237 if (ret) { 5238 mlog_errno(ret); 5239 goto out; 5240 } 5241 5242 while (end_blk != target_blk) { 5243 ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk, 5244 end_blk + blk_per_bucket, 0); 5245 if (ret) 5246 goto out; 5247 end_blk -= blk_per_bucket; 5248 } 5249 5250 /* Move half of the xattr in target_blkno to the next bucket. */ 5251 ret = ocfs2_divide_xattr_bucket(inode, handle, target_blk, 5252 target_blk + blk_per_bucket, NULL, 0); 5253 5254 le16_add_cpu(&bucket_xh(first)->xh_num_buckets, 1); 5255 ocfs2_xattr_bucket_journal_dirty(handle, first); 5256 5257 out: 5258 return ret; 5259 } 5260 5261 /* 5262 * Add new xattr bucket in an extent record and adjust the buckets 5263 * accordingly. xb_bh is the ocfs2_xattr_block, and target is the 5264 * bucket we want to insert into. 5265 * 5266 * In the easy case, we will move all the buckets after target down by 5267 * one. Half of target's xattrs will be moved to the next bucket. 5268 * 5269 * If current cluster is full, we'll allocate a new one. This may not 5270 * be contiguous. The underlying calls will make sure that there is 5271 * space for the insert, shifting buckets around if necessary. 5272 * 'target' may be moved by those calls. 5273 */ 5274 static int ocfs2_add_new_xattr_bucket(struct inode *inode, 5275 struct buffer_head *xb_bh, 5276 struct ocfs2_xattr_bucket *target, 5277 struct ocfs2_xattr_set_ctxt *ctxt) 5278 { 5279 struct ocfs2_xattr_block *xb = 5280 (struct ocfs2_xattr_block *)xb_bh->b_data; 5281 struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root; 5282 struct ocfs2_extent_list *el = &xb_root->xt_list; 5283 u32 name_hash = 5284 le32_to_cpu(bucket_xh(target)->xh_entries[0].xe_name_hash); 5285 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5286 int ret, num_buckets, extend = 1; 5287 u64 p_blkno; 5288 u32 e_cpos, num_clusters; 5289 /* The bucket at the front of the extent */ 5290 struct ocfs2_xattr_bucket *first; 5291 5292 trace_ocfs2_add_new_xattr_bucket( 5293 (unsigned long long)bucket_blkno(target)); 5294 5295 /* The first bucket of the original extent */ 5296 first = ocfs2_xattr_bucket_new(inode); 5297 if (!first) { 5298 ret = -ENOMEM; 5299 mlog_errno(ret); 5300 goto out; 5301 } 5302 5303 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos, 5304 &num_clusters, el); 5305 if (ret) { 5306 mlog_errno(ret); 5307 goto out; 5308 } 5309 5310 ret = ocfs2_read_xattr_bucket(first, p_blkno); 5311 if (ret) { 5312 mlog_errno(ret); 5313 goto out; 5314 } 5315 5316 num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters; 5317 if (num_buckets == le16_to_cpu(bucket_xh(first)->xh_num_buckets)) { 5318 /* 5319 * This can move first+target if the target bucket moves 5320 * to the new extent. 5321 */ 5322 ret = ocfs2_add_new_xattr_cluster(inode, 5323 xb_bh, 5324 first, 5325 target, 5326 &num_clusters, 5327 e_cpos, 5328 &extend, 5329 ctxt); 5330 if (ret) { 5331 mlog_errno(ret); 5332 goto out; 5333 } 5334 } 5335 5336 if (extend) { 5337 ret = ocfs2_extend_xattr_bucket(inode, 5338 ctxt->handle, 5339 first, 5340 bucket_blkno(target), 5341 num_clusters); 5342 if (ret) 5343 mlog_errno(ret); 5344 } 5345 5346 out: 5347 ocfs2_xattr_bucket_free(first); 5348 5349 return ret; 5350 } 5351 5352 /* 5353 * Truncate the specified xe_off entry in xattr bucket. 5354 * bucket is indicated by header_bh and len is the new length. 5355 * Both the ocfs2_xattr_value_root and the entry will be updated here. 5356 * 5357 * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed. 5358 */ 5359 static int ocfs2_xattr_bucket_value_truncate(struct inode *inode, 5360 struct ocfs2_xattr_bucket *bucket, 5361 int xe_off, 5362 int len, 5363 struct ocfs2_xattr_set_ctxt *ctxt) 5364 { 5365 int ret, offset; 5366 u64 value_blk; 5367 struct ocfs2_xattr_entry *xe; 5368 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 5369 size_t blocksize = inode->i_sb->s_blocksize; 5370 struct ocfs2_xattr_value_buf vb = { 5371 .vb_access = ocfs2_journal_access, 5372 }; 5373 5374 xe = &xh->xh_entries[xe_off]; 5375 5376 BUG_ON(!xe || ocfs2_xattr_is_local(xe)); 5377 5378 offset = le16_to_cpu(xe->xe_name_offset) + 5379 OCFS2_XATTR_SIZE(xe->xe_name_len); 5380 5381 value_blk = offset / blocksize; 5382 5383 /* We don't allow ocfs2_xattr_value to be stored in different block. */ 5384 BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize); 5385 5386 vb.vb_bh = bucket->bu_bhs[value_blk]; 5387 BUG_ON(!vb.vb_bh); 5388 5389 vb.vb_xv = (struct ocfs2_xattr_value_root *) 5390 (vb.vb_bh->b_data + offset % blocksize); 5391 5392 /* 5393 * From here on out we have to dirty the bucket. The generic 5394 * value calls only modify one of the bucket's bhs, but we need 5395 * to send the bucket at once. So if they error, they *could* have 5396 * modified something. We have to assume they did, and dirty 5397 * the whole bucket. This leaves us in a consistent state. 5398 */ 5399 trace_ocfs2_xattr_bucket_value_truncate( 5400 (unsigned long long)bucket_blkno(bucket), xe_off, len); 5401 ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt); 5402 if (ret) { 5403 mlog_errno(ret); 5404 goto out; 5405 } 5406 5407 ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket, 5408 OCFS2_JOURNAL_ACCESS_WRITE); 5409 if (ret) { 5410 mlog_errno(ret); 5411 goto out; 5412 } 5413 5414 xe->xe_value_size = cpu_to_le64(len); 5415 5416 ocfs2_xattr_bucket_journal_dirty(ctxt->handle, bucket); 5417 5418 out: 5419 return ret; 5420 } 5421 5422 static int ocfs2_rm_xattr_cluster(struct inode *inode, 5423 struct buffer_head *root_bh, 5424 u64 blkno, 5425 u32 cpos, 5426 u32 len, 5427 void *para) 5428 { 5429 int ret; 5430 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5431 struct inode *tl_inode = osb->osb_tl_inode; 5432 handle_t *handle; 5433 struct ocfs2_xattr_block *xb = 5434 (struct ocfs2_xattr_block *)root_bh->b_data; 5435 struct ocfs2_alloc_context *meta_ac = NULL; 5436 struct ocfs2_cached_dealloc_ctxt dealloc; 5437 struct ocfs2_extent_tree et; 5438 5439 ret = ocfs2_iterate_xattr_buckets(inode, blkno, len, 5440 ocfs2_delete_xattr_in_bucket, para); 5441 if (ret) { 5442 mlog_errno(ret); 5443 return ret; 5444 } 5445 5446 ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh); 5447 5448 ocfs2_init_dealloc_ctxt(&dealloc); 5449 5450 trace_ocfs2_rm_xattr_cluster( 5451 (unsigned long long)OCFS2_I(inode)->ip_blkno, 5452 (unsigned long long)blkno, cpos, len); 5453 5454 ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), blkno, 5455 len); 5456 5457 ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac); 5458 if (ret) { 5459 mlog_errno(ret); 5460 return ret; 5461 } 5462 5463 inode_lock(tl_inode); 5464 5465 if (ocfs2_truncate_log_needs_flush(osb)) { 5466 ret = __ocfs2_flush_truncate_log(osb); 5467 if (ret < 0) { 5468 mlog_errno(ret); 5469 goto out; 5470 } 5471 } 5472 5473 handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb)); 5474 if (IS_ERR(handle)) { 5475 ret = -ENOMEM; 5476 mlog_errno(ret); 5477 goto out; 5478 } 5479 5480 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh, 5481 OCFS2_JOURNAL_ACCESS_WRITE); 5482 if (ret) { 5483 mlog_errno(ret); 5484 goto out_commit; 5485 } 5486 5487 ret = ocfs2_remove_extent(handle, &et, cpos, len, meta_ac, 5488 &dealloc); 5489 if (ret) { 5490 mlog_errno(ret); 5491 goto out_commit; 5492 } 5493 5494 le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len); 5495 ocfs2_journal_dirty(handle, root_bh); 5496 5497 ret = ocfs2_truncate_log_append(osb, handle, blkno, len); 5498 if (ret) 5499 mlog_errno(ret); 5500 ocfs2_update_inode_fsync_trans(handle, inode, 0); 5501 5502 out_commit: 5503 ocfs2_commit_trans(osb, handle); 5504 out: 5505 ocfs2_schedule_truncate_log_flush(osb, 1); 5506 5507 inode_unlock(tl_inode); 5508 5509 if (meta_ac) 5510 ocfs2_free_alloc_context(meta_ac); 5511 5512 ocfs2_run_deallocs(osb, &dealloc); 5513 5514 return ret; 5515 } 5516 5517 /* 5518 * check whether the xattr bucket is filled up with the same hash value. 5519 * If we want to insert the xattr with the same hash, return -ENOSPC. 5520 * If we want to insert a xattr with different hash value, go ahead 5521 * and ocfs2_divide_xattr_bucket will handle this. 5522 */ 5523 static int ocfs2_check_xattr_bucket_collision(struct inode *inode, 5524 struct ocfs2_xattr_bucket *bucket, 5525 const char *name) 5526 { 5527 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 5528 u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name)); 5529 5530 if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash)) 5531 return 0; 5532 5533 if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash == 5534 xh->xh_entries[0].xe_name_hash) { 5535 mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, " 5536 "hash = %u\n", 5537 (unsigned long long)bucket_blkno(bucket), 5538 le32_to_cpu(xh->xh_entries[0].xe_name_hash)); 5539 return -ENOSPC; 5540 } 5541 5542 return 0; 5543 } 5544 5545 /* 5546 * Try to set the entry in the current bucket. If we fail, the caller 5547 * will handle getting us another bucket. 5548 */ 5549 static int ocfs2_xattr_set_entry_bucket(struct inode *inode, 5550 struct ocfs2_xattr_info *xi, 5551 struct ocfs2_xattr_search *xs, 5552 struct ocfs2_xattr_set_ctxt *ctxt) 5553 { 5554 int ret; 5555 struct ocfs2_xa_loc loc; 5556 5557 trace_ocfs2_xattr_set_entry_bucket(xi->xi_name); 5558 5559 ocfs2_init_xattr_bucket_xa_loc(&loc, xs->bucket, 5560 xs->not_found ? NULL : xs->here); 5561 ret = ocfs2_xa_set(&loc, xi, ctxt); 5562 if (!ret) { 5563 xs->here = loc.xl_entry; 5564 goto out; 5565 } 5566 if (ret != -ENOSPC) { 5567 mlog_errno(ret); 5568 goto out; 5569 } 5570 5571 /* Ok, we need space. Let's try defragmenting the bucket. */ 5572 ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle, 5573 xs->bucket); 5574 if (ret) { 5575 mlog_errno(ret); 5576 goto out; 5577 } 5578 5579 ret = ocfs2_xa_set(&loc, xi, ctxt); 5580 if (!ret) { 5581 xs->here = loc.xl_entry; 5582 goto out; 5583 } 5584 if (ret != -ENOSPC) 5585 mlog_errno(ret); 5586 5587 5588 out: 5589 return ret; 5590 } 5591 5592 static int ocfs2_xattr_set_entry_index_block(struct inode *inode, 5593 struct ocfs2_xattr_info *xi, 5594 struct ocfs2_xattr_search *xs, 5595 struct ocfs2_xattr_set_ctxt *ctxt) 5596 { 5597 int ret; 5598 5599 trace_ocfs2_xattr_set_entry_index_block(xi->xi_name); 5600 5601 ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt); 5602 if (!ret) 5603 goto out; 5604 if (ret != -ENOSPC) { 5605 mlog_errno(ret); 5606 goto out; 5607 } 5608 5609 /* Ack, need more space. Let's try to get another bucket! */ 5610 5611 /* 5612 * We do not allow for overlapping ranges between buckets. And 5613 * the maximum number of collisions we will allow for then is 5614 * one bucket's worth, so check it here whether we need to 5615 * add a new bucket for the insert. 5616 */ 5617 ret = ocfs2_check_xattr_bucket_collision(inode, 5618 xs->bucket, 5619 xi->xi_name); 5620 if (ret) { 5621 mlog_errno(ret); 5622 goto out; 5623 } 5624 5625 ret = ocfs2_add_new_xattr_bucket(inode, 5626 xs->xattr_bh, 5627 xs->bucket, 5628 ctxt); 5629 if (ret) { 5630 mlog_errno(ret); 5631 goto out; 5632 } 5633 5634 /* 5635 * ocfs2_add_new_xattr_bucket() will have updated 5636 * xs->bucket if it moved, but it will not have updated 5637 * any of the other search fields. Thus, we drop it and 5638 * re-search. Everything should be cached, so it'll be 5639 * quick. 5640 */ 5641 ocfs2_xattr_bucket_relse(xs->bucket); 5642 ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh, 5643 xi->xi_name_index, 5644 xi->xi_name, xs); 5645 if (ret && ret != -ENODATA) 5646 goto out; 5647 xs->not_found = ret; 5648 5649 /* Ok, we have a new bucket, let's try again */ 5650 ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt); 5651 if (ret && (ret != -ENOSPC)) 5652 mlog_errno(ret); 5653 5654 out: 5655 return ret; 5656 } 5657 5658 static int ocfs2_delete_xattr_in_bucket(struct inode *inode, 5659 struct ocfs2_xattr_bucket *bucket, 5660 void *para) 5661 { 5662 int ret = 0, ref_credits; 5663 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 5664 u16 i; 5665 struct ocfs2_xattr_entry *xe; 5666 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5667 struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,}; 5668 int credits = ocfs2_remove_extent_credits(osb->sb) + 5669 ocfs2_blocks_per_xattr_bucket(inode->i_sb); 5670 struct ocfs2_xattr_value_root *xv; 5671 struct ocfs2_rm_xattr_bucket_para *args = 5672 (struct ocfs2_rm_xattr_bucket_para *)para; 5673 5674 ocfs2_init_dealloc_ctxt(&ctxt.dealloc); 5675 5676 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 5677 xe = &xh->xh_entries[i]; 5678 if (ocfs2_xattr_is_local(xe)) 5679 continue; 5680 5681 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, 5682 i, &xv, NULL); 5683 if (ret) { 5684 mlog_errno(ret); 5685 break; 5686 } 5687 5688 ret = ocfs2_lock_xattr_remove_allocators(inode, xv, 5689 args->ref_ci, 5690 args->ref_root_bh, 5691 &ctxt.meta_ac, 5692 &ref_credits); 5693 5694 ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits); 5695 if (IS_ERR(ctxt.handle)) { 5696 ret = PTR_ERR(ctxt.handle); 5697 mlog_errno(ret); 5698 break; 5699 } 5700 5701 ret = ocfs2_xattr_bucket_value_truncate(inode, bucket, 5702 i, 0, &ctxt); 5703 5704 ocfs2_commit_trans(osb, ctxt.handle); 5705 if (ctxt.meta_ac) { 5706 ocfs2_free_alloc_context(ctxt.meta_ac); 5707 ctxt.meta_ac = NULL; 5708 } 5709 if (ret) { 5710 mlog_errno(ret); 5711 break; 5712 } 5713 } 5714 5715 if (ctxt.meta_ac) 5716 ocfs2_free_alloc_context(ctxt.meta_ac); 5717 ocfs2_schedule_truncate_log_flush(osb, 1); 5718 ocfs2_run_deallocs(osb, &ctxt.dealloc); 5719 return ret; 5720 } 5721 5722 /* 5723 * Whenever we modify a xattr value root in the bucket(e.g, CoW 5724 * or change the extent record flag), we need to recalculate 5725 * the metaecc for the whole bucket. So it is done here. 5726 * 5727 * Note: 5728 * We have to give the extra credits for the caller. 5729 */ 5730 static int ocfs2_xattr_bucket_post_refcount(struct inode *inode, 5731 handle_t *handle, 5732 void *para) 5733 { 5734 int ret; 5735 struct ocfs2_xattr_bucket *bucket = 5736 (struct ocfs2_xattr_bucket *)para; 5737 5738 ret = ocfs2_xattr_bucket_journal_access(handle, bucket, 5739 OCFS2_JOURNAL_ACCESS_WRITE); 5740 if (ret) { 5741 mlog_errno(ret); 5742 return ret; 5743 } 5744 5745 ocfs2_xattr_bucket_journal_dirty(handle, bucket); 5746 5747 return 0; 5748 } 5749 5750 /* 5751 * Special action we need if the xattr value is refcounted. 5752 * 5753 * 1. If the xattr is refcounted, lock the tree. 5754 * 2. CoW the xattr if we are setting the new value and the value 5755 * will be stored outside. 5756 * 3. In other case, decrease_refcount will work for us, so just 5757 * lock the refcount tree, calculate the meta and credits is OK. 5758 * 5759 * We have to do CoW before ocfs2_init_xattr_set_ctxt since 5760 * currently CoW is a completed transaction, while this function 5761 * will also lock the allocators and let us deadlock. So we will 5762 * CoW the whole xattr value. 5763 */ 5764 static int ocfs2_prepare_refcount_xattr(struct inode *inode, 5765 struct ocfs2_dinode *di, 5766 struct ocfs2_xattr_info *xi, 5767 struct ocfs2_xattr_search *xis, 5768 struct ocfs2_xattr_search *xbs, 5769 struct ocfs2_refcount_tree **ref_tree, 5770 int *meta_add, 5771 int *credits) 5772 { 5773 int ret = 0; 5774 struct ocfs2_xattr_block *xb; 5775 struct ocfs2_xattr_entry *xe; 5776 char *base; 5777 u32 p_cluster, num_clusters; 5778 unsigned int ext_flags; 5779 int name_offset, name_len; 5780 struct ocfs2_xattr_value_buf vb; 5781 struct ocfs2_xattr_bucket *bucket = NULL; 5782 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5783 struct ocfs2_post_refcount refcount; 5784 struct ocfs2_post_refcount *p = NULL; 5785 struct buffer_head *ref_root_bh = NULL; 5786 5787 if (!xis->not_found) { 5788 xe = xis->here; 5789 name_offset = le16_to_cpu(xe->xe_name_offset); 5790 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 5791 base = xis->base; 5792 vb.vb_bh = xis->inode_bh; 5793 vb.vb_access = ocfs2_journal_access_di; 5794 } else { 5795 int i, block_off = 0; 5796 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; 5797 xe = xbs->here; 5798 name_offset = le16_to_cpu(xe->xe_name_offset); 5799 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 5800 i = xbs->here - xbs->header->xh_entries; 5801 5802 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 5803 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 5804 bucket_xh(xbs->bucket), 5805 i, &block_off, 5806 &name_offset); 5807 if (ret) { 5808 mlog_errno(ret); 5809 goto out; 5810 } 5811 base = bucket_block(xbs->bucket, block_off); 5812 vb.vb_bh = xbs->bucket->bu_bhs[block_off]; 5813 vb.vb_access = ocfs2_journal_access; 5814 5815 if (ocfs2_meta_ecc(osb)) { 5816 /*create parameters for ocfs2_post_refcount. */ 5817 bucket = xbs->bucket; 5818 refcount.credits = bucket->bu_blocks; 5819 refcount.para = bucket; 5820 refcount.func = 5821 ocfs2_xattr_bucket_post_refcount; 5822 p = &refcount; 5823 } 5824 } else { 5825 base = xbs->base; 5826 vb.vb_bh = xbs->xattr_bh; 5827 vb.vb_access = ocfs2_journal_access_xb; 5828 } 5829 } 5830 5831 if (ocfs2_xattr_is_local(xe)) 5832 goto out; 5833 5834 vb.vb_xv = (struct ocfs2_xattr_value_root *) 5835 (base + name_offset + name_len); 5836 5837 ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster, 5838 &num_clusters, &vb.vb_xv->xr_list, 5839 &ext_flags); 5840 if (ret) { 5841 mlog_errno(ret); 5842 goto out; 5843 } 5844 5845 /* 5846 * We just need to check the 1st extent record, since we always 5847 * CoW the whole xattr. So there shouldn't be a xattr with 5848 * some REFCOUNT extent recs after the 1st one. 5849 */ 5850 if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) 5851 goto out; 5852 5853 ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc), 5854 1, ref_tree, &ref_root_bh); 5855 if (ret) { 5856 mlog_errno(ret); 5857 goto out; 5858 } 5859 5860 /* 5861 * If we are deleting the xattr or the new size will be stored inside, 5862 * cool, leave it there, the xattr truncate process will remove them 5863 * for us(it still needs the refcount tree lock and the meta, credits). 5864 * And the worse case is that every cluster truncate will split the 5865 * refcount tree, and make the original extent become 3. So we will need 5866 * 2 * cluster more extent recs at most. 5867 */ 5868 if (!xi->xi_value || xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE) { 5869 5870 ret = ocfs2_refcounted_xattr_delete_need(inode, 5871 &(*ref_tree)->rf_ci, 5872 ref_root_bh, vb.vb_xv, 5873 meta_add, credits); 5874 if (ret) 5875 mlog_errno(ret); 5876 goto out; 5877 } 5878 5879 ret = ocfs2_refcount_cow_xattr(inode, di, &vb, 5880 *ref_tree, ref_root_bh, 0, 5881 le32_to_cpu(vb.vb_xv->xr_clusters), p); 5882 if (ret) 5883 mlog_errno(ret); 5884 5885 out: 5886 brelse(ref_root_bh); 5887 return ret; 5888 } 5889 5890 /* 5891 * Add the REFCOUNTED flags for all the extent rec in ocfs2_xattr_value_root. 5892 * The physical clusters will be added to refcount tree. 5893 */ 5894 static int ocfs2_xattr_value_attach_refcount(struct inode *inode, 5895 struct ocfs2_xattr_value_root *xv, 5896 struct ocfs2_extent_tree *value_et, 5897 struct ocfs2_caching_info *ref_ci, 5898 struct buffer_head *ref_root_bh, 5899 struct ocfs2_cached_dealloc_ctxt *dealloc, 5900 struct ocfs2_post_refcount *refcount) 5901 { 5902 int ret = 0; 5903 u32 clusters = le32_to_cpu(xv->xr_clusters); 5904 u32 cpos, p_cluster, num_clusters; 5905 struct ocfs2_extent_list *el = &xv->xr_list; 5906 unsigned int ext_flags; 5907 5908 cpos = 0; 5909 while (cpos < clusters) { 5910 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, 5911 &num_clusters, el, &ext_flags); 5912 if (ret) { 5913 mlog_errno(ret); 5914 break; 5915 } 5916 5917 cpos += num_clusters; 5918 if ((ext_flags & OCFS2_EXT_REFCOUNTED)) 5919 continue; 5920 5921 BUG_ON(!p_cluster); 5922 5923 ret = ocfs2_add_refcount_flag(inode, value_et, 5924 ref_ci, ref_root_bh, 5925 cpos - num_clusters, 5926 p_cluster, num_clusters, 5927 dealloc, refcount); 5928 if (ret) { 5929 mlog_errno(ret); 5930 break; 5931 } 5932 } 5933 5934 return ret; 5935 } 5936 5937 /* 5938 * Given a normal ocfs2_xattr_header, refcount all the entries which 5939 * have value stored outside. 5940 * Used for xattrs stored in inode and ocfs2_xattr_block. 5941 */ 5942 static int ocfs2_xattr_attach_refcount_normal(struct inode *inode, 5943 struct ocfs2_xattr_value_buf *vb, 5944 struct ocfs2_xattr_header *header, 5945 struct ocfs2_caching_info *ref_ci, 5946 struct buffer_head *ref_root_bh, 5947 struct ocfs2_cached_dealloc_ctxt *dealloc) 5948 { 5949 5950 struct ocfs2_xattr_entry *xe; 5951 struct ocfs2_xattr_value_root *xv; 5952 struct ocfs2_extent_tree et; 5953 int i, ret = 0; 5954 5955 for (i = 0; i < le16_to_cpu(header->xh_count); i++) { 5956 xe = &header->xh_entries[i]; 5957 5958 if (ocfs2_xattr_is_local(xe)) 5959 continue; 5960 5961 xv = (struct ocfs2_xattr_value_root *)((void *)header + 5962 le16_to_cpu(xe->xe_name_offset) + 5963 OCFS2_XATTR_SIZE(xe->xe_name_len)); 5964 5965 vb->vb_xv = xv; 5966 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); 5967 5968 ret = ocfs2_xattr_value_attach_refcount(inode, xv, &et, 5969 ref_ci, ref_root_bh, 5970 dealloc, NULL); 5971 if (ret) { 5972 mlog_errno(ret); 5973 break; 5974 } 5975 } 5976 5977 return ret; 5978 } 5979 5980 static int ocfs2_xattr_inline_attach_refcount(struct inode *inode, 5981 struct buffer_head *fe_bh, 5982 struct ocfs2_caching_info *ref_ci, 5983 struct buffer_head *ref_root_bh, 5984 struct ocfs2_cached_dealloc_ctxt *dealloc) 5985 { 5986 struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data; 5987 struct ocfs2_xattr_header *header = (struct ocfs2_xattr_header *) 5988 (fe_bh->b_data + inode->i_sb->s_blocksize - 5989 le16_to_cpu(di->i_xattr_inline_size)); 5990 struct ocfs2_xattr_value_buf vb = { 5991 .vb_bh = fe_bh, 5992 .vb_access = ocfs2_journal_access_di, 5993 }; 5994 5995 return ocfs2_xattr_attach_refcount_normal(inode, &vb, header, 5996 ref_ci, ref_root_bh, dealloc); 5997 } 5998 5999 struct ocfs2_xattr_tree_value_refcount_para { 6000 struct ocfs2_caching_info *ref_ci; 6001 struct buffer_head *ref_root_bh; 6002 struct ocfs2_cached_dealloc_ctxt *dealloc; 6003 }; 6004 6005 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb, 6006 struct ocfs2_xattr_bucket *bucket, 6007 int offset, 6008 struct ocfs2_xattr_value_root **xv, 6009 struct buffer_head **bh) 6010 { 6011 int ret, block_off, name_offset; 6012 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 6013 struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset]; 6014 void *base; 6015 6016 ret = ocfs2_xattr_bucket_get_name_value(sb, 6017 bucket_xh(bucket), 6018 offset, 6019 &block_off, 6020 &name_offset); 6021 if (ret) { 6022 mlog_errno(ret); 6023 goto out; 6024 } 6025 6026 base = bucket_block(bucket, block_off); 6027 6028 *xv = (struct ocfs2_xattr_value_root *)(base + name_offset + 6029 OCFS2_XATTR_SIZE(xe->xe_name_len)); 6030 6031 if (bh) 6032 *bh = bucket->bu_bhs[block_off]; 6033 out: 6034 return ret; 6035 } 6036 6037 /* 6038 * For a given xattr bucket, refcount all the entries which 6039 * have value stored outside. 6040 */ 6041 static int ocfs2_xattr_bucket_value_refcount(struct inode *inode, 6042 struct ocfs2_xattr_bucket *bucket, 6043 void *para) 6044 { 6045 int i, ret = 0; 6046 struct ocfs2_extent_tree et; 6047 struct ocfs2_xattr_tree_value_refcount_para *ref = 6048 (struct ocfs2_xattr_tree_value_refcount_para *)para; 6049 struct ocfs2_xattr_header *xh = 6050 (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data; 6051 struct ocfs2_xattr_entry *xe; 6052 struct ocfs2_xattr_value_buf vb = { 6053 .vb_access = ocfs2_journal_access, 6054 }; 6055 struct ocfs2_post_refcount refcount = { 6056 .credits = bucket->bu_blocks, 6057 .para = bucket, 6058 .func = ocfs2_xattr_bucket_post_refcount, 6059 }; 6060 struct ocfs2_post_refcount *p = NULL; 6061 6062 /* We only need post_refcount if we support metaecc. */ 6063 if (ocfs2_meta_ecc(OCFS2_SB(inode->i_sb))) 6064 p = &refcount; 6065 6066 trace_ocfs2_xattr_bucket_value_refcount( 6067 (unsigned long long)bucket_blkno(bucket), 6068 le16_to_cpu(xh->xh_count)); 6069 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 6070 xe = &xh->xh_entries[i]; 6071 6072 if (ocfs2_xattr_is_local(xe)) 6073 continue; 6074 6075 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, i, 6076 &vb.vb_xv, &vb.vb_bh); 6077 if (ret) { 6078 mlog_errno(ret); 6079 break; 6080 } 6081 6082 ocfs2_init_xattr_value_extent_tree(&et, 6083 INODE_CACHE(inode), &vb); 6084 6085 ret = ocfs2_xattr_value_attach_refcount(inode, vb.vb_xv, 6086 &et, ref->ref_ci, 6087 ref->ref_root_bh, 6088 ref->dealloc, p); 6089 if (ret) { 6090 mlog_errno(ret); 6091 break; 6092 } 6093 } 6094 6095 return ret; 6096 6097 } 6098 6099 static int ocfs2_refcount_xattr_tree_rec(struct inode *inode, 6100 struct buffer_head *root_bh, 6101 u64 blkno, u32 cpos, u32 len, void *para) 6102 { 6103 return ocfs2_iterate_xattr_buckets(inode, blkno, len, 6104 ocfs2_xattr_bucket_value_refcount, 6105 para); 6106 } 6107 6108 static int ocfs2_xattr_block_attach_refcount(struct inode *inode, 6109 struct buffer_head *blk_bh, 6110 struct ocfs2_caching_info *ref_ci, 6111 struct buffer_head *ref_root_bh, 6112 struct ocfs2_cached_dealloc_ctxt *dealloc) 6113 { 6114 int ret = 0; 6115 struct ocfs2_xattr_block *xb = 6116 (struct ocfs2_xattr_block *)blk_bh->b_data; 6117 6118 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 6119 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header; 6120 struct ocfs2_xattr_value_buf vb = { 6121 .vb_bh = blk_bh, 6122 .vb_access = ocfs2_journal_access_xb, 6123 }; 6124 6125 ret = ocfs2_xattr_attach_refcount_normal(inode, &vb, header, 6126 ref_ci, ref_root_bh, 6127 dealloc); 6128 } else { 6129 struct ocfs2_xattr_tree_value_refcount_para para = { 6130 .ref_ci = ref_ci, 6131 .ref_root_bh = ref_root_bh, 6132 .dealloc = dealloc, 6133 }; 6134 6135 ret = ocfs2_iterate_xattr_index_block(inode, blk_bh, 6136 ocfs2_refcount_xattr_tree_rec, 6137 ¶); 6138 } 6139 6140 return ret; 6141 } 6142 6143 int ocfs2_xattr_attach_refcount_tree(struct inode *inode, 6144 struct buffer_head *fe_bh, 6145 struct ocfs2_caching_info *ref_ci, 6146 struct buffer_head *ref_root_bh, 6147 struct ocfs2_cached_dealloc_ctxt *dealloc) 6148 { 6149 int ret = 0; 6150 struct ocfs2_inode_info *oi = OCFS2_I(inode); 6151 struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data; 6152 struct buffer_head *blk_bh = NULL; 6153 6154 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 6155 ret = ocfs2_xattr_inline_attach_refcount(inode, fe_bh, 6156 ref_ci, ref_root_bh, 6157 dealloc); 6158 if (ret) { 6159 mlog_errno(ret); 6160 goto out; 6161 } 6162 } 6163 6164 if (!di->i_xattr_loc) 6165 goto out; 6166 6167 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc), 6168 &blk_bh); 6169 if (ret < 0) { 6170 mlog_errno(ret); 6171 goto out; 6172 } 6173 6174 ret = ocfs2_xattr_block_attach_refcount(inode, blk_bh, ref_ci, 6175 ref_root_bh, dealloc); 6176 if (ret) 6177 mlog_errno(ret); 6178 6179 brelse(blk_bh); 6180 out: 6181 6182 return ret; 6183 } 6184 6185 typedef int (should_xattr_reflinked)(struct ocfs2_xattr_entry *xe); 6186 /* 6187 * Store the information we need in xattr reflink. 6188 * old_bh and new_bh are inode bh for the old and new inode. 6189 */ 6190 struct ocfs2_xattr_reflink { 6191 struct inode *old_inode; 6192 struct inode *new_inode; 6193 struct buffer_head *old_bh; 6194 struct buffer_head *new_bh; 6195 struct ocfs2_caching_info *ref_ci; 6196 struct buffer_head *ref_root_bh; 6197 struct ocfs2_cached_dealloc_ctxt *dealloc; 6198 should_xattr_reflinked *xattr_reflinked; 6199 }; 6200 6201 /* 6202 * Given a xattr header and xe offset, 6203 * return the proper xv and the corresponding bh. 6204 * xattr in inode, block and xattr tree have different implementaions. 6205 */ 6206 typedef int (get_xattr_value_root)(struct super_block *sb, 6207 struct buffer_head *bh, 6208 struct ocfs2_xattr_header *xh, 6209 int offset, 6210 struct ocfs2_xattr_value_root **xv, 6211 struct buffer_head **ret_bh, 6212 void *para); 6213 6214 /* 6215 * Calculate all the xattr value root metadata stored in this xattr header and 6216 * credits we need if we create them from the scratch. 6217 * We use get_xattr_value_root so that all types of xattr container can use it. 6218 */ 6219 static int ocfs2_value_metas_in_xattr_header(struct super_block *sb, 6220 struct buffer_head *bh, 6221 struct ocfs2_xattr_header *xh, 6222 int *metas, int *credits, 6223 int *num_recs, 6224 get_xattr_value_root *func, 6225 void *para) 6226 { 6227 int i, ret = 0; 6228 struct ocfs2_xattr_value_root *xv; 6229 struct ocfs2_xattr_entry *xe; 6230 6231 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 6232 xe = &xh->xh_entries[i]; 6233 if (ocfs2_xattr_is_local(xe)) 6234 continue; 6235 6236 ret = func(sb, bh, xh, i, &xv, NULL, para); 6237 if (ret) { 6238 mlog_errno(ret); 6239 break; 6240 } 6241 6242 *metas += le16_to_cpu(xv->xr_list.l_tree_depth) * 6243 le16_to_cpu(xv->xr_list.l_next_free_rec); 6244 6245 *credits += ocfs2_calc_extend_credits(sb, 6246 &def_xv.xv.xr_list); 6247 6248 /* 6249 * If the value is a tree with depth > 1, We don't go deep 6250 * to the extent block, so just calculate a maximum record num. 6251 */ 6252 if (!xv->xr_list.l_tree_depth) 6253 *num_recs += le16_to_cpu(xv->xr_list.l_next_free_rec); 6254 else 6255 *num_recs += ocfs2_clusters_for_bytes(sb, 6256 XATTR_SIZE_MAX); 6257 } 6258 6259 return ret; 6260 } 6261 6262 /* Used by xattr inode and block to return the right xv and buffer_head. */ 6263 static int ocfs2_get_xattr_value_root(struct super_block *sb, 6264 struct buffer_head *bh, 6265 struct ocfs2_xattr_header *xh, 6266 int offset, 6267 struct ocfs2_xattr_value_root **xv, 6268 struct buffer_head **ret_bh, 6269 void *para) 6270 { 6271 struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset]; 6272 6273 *xv = (struct ocfs2_xattr_value_root *)((void *)xh + 6274 le16_to_cpu(xe->xe_name_offset) + 6275 OCFS2_XATTR_SIZE(xe->xe_name_len)); 6276 6277 if (ret_bh) 6278 *ret_bh = bh; 6279 6280 return 0; 6281 } 6282 6283 /* 6284 * Lock the meta_ac and caculate how much credits we need for reflink xattrs. 6285 * It is only used for inline xattr and xattr block. 6286 */ 6287 static int ocfs2_reflink_lock_xattr_allocators(struct ocfs2_super *osb, 6288 struct ocfs2_xattr_header *xh, 6289 struct buffer_head *ref_root_bh, 6290 int *credits, 6291 struct ocfs2_alloc_context **meta_ac) 6292 { 6293 int ret, meta_add = 0, num_recs = 0; 6294 struct ocfs2_refcount_block *rb = 6295 (struct ocfs2_refcount_block *)ref_root_bh->b_data; 6296 6297 *credits = 0; 6298 6299 ret = ocfs2_value_metas_in_xattr_header(osb->sb, NULL, xh, 6300 &meta_add, credits, &num_recs, 6301 ocfs2_get_xattr_value_root, 6302 NULL); 6303 if (ret) { 6304 mlog_errno(ret); 6305 goto out; 6306 } 6307 6308 /* 6309 * We need to add/modify num_recs in refcount tree, so just calculate 6310 * an approximate number we need for refcount tree change. 6311 * Sometimes we need to split the tree, and after split, half recs 6312 * will be moved to the new block, and a new block can only provide 6313 * half number of recs. So we multiple new blocks by 2. 6314 */ 6315 num_recs = num_recs / ocfs2_refcount_recs_per_rb(osb->sb) * 2; 6316 meta_add += num_recs; 6317 *credits += num_recs + num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS; 6318 if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL) 6319 *credits += le16_to_cpu(rb->rf_list.l_tree_depth) * 6320 le16_to_cpu(rb->rf_list.l_next_free_rec) + 1; 6321 else 6322 *credits += 1; 6323 6324 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, meta_ac); 6325 if (ret) 6326 mlog_errno(ret); 6327 6328 out: 6329 return ret; 6330 } 6331 6332 /* 6333 * Given a xattr header, reflink all the xattrs in this container. 6334 * It can be used for inode, block and bucket. 6335 * 6336 * NOTE: 6337 * Before we call this function, the caller has memcpy the xattr in 6338 * old_xh to the new_xh. 6339 * 6340 * If args.xattr_reflinked is set, call it to decide whether the xe should 6341 * be reflinked or not. If not, remove it from the new xattr header. 6342 */ 6343 static int ocfs2_reflink_xattr_header(handle_t *handle, 6344 struct ocfs2_xattr_reflink *args, 6345 struct buffer_head *old_bh, 6346 struct ocfs2_xattr_header *xh, 6347 struct buffer_head *new_bh, 6348 struct ocfs2_xattr_header *new_xh, 6349 struct ocfs2_xattr_value_buf *vb, 6350 struct ocfs2_alloc_context *meta_ac, 6351 get_xattr_value_root *func, 6352 void *para) 6353 { 6354 int ret = 0, i, j; 6355 struct super_block *sb = args->old_inode->i_sb; 6356 struct buffer_head *value_bh; 6357 struct ocfs2_xattr_entry *xe, *last; 6358 struct ocfs2_xattr_value_root *xv, *new_xv; 6359 struct ocfs2_extent_tree data_et; 6360 u32 clusters, cpos, p_cluster, num_clusters; 6361 unsigned int ext_flags = 0; 6362 6363 trace_ocfs2_reflink_xattr_header((unsigned long long)old_bh->b_blocknr, 6364 le16_to_cpu(xh->xh_count)); 6365 6366 last = &new_xh->xh_entries[le16_to_cpu(new_xh->xh_count)]; 6367 for (i = 0, j = 0; i < le16_to_cpu(xh->xh_count); i++, j++) { 6368 xe = &xh->xh_entries[i]; 6369 6370 if (args->xattr_reflinked && !args->xattr_reflinked(xe)) { 6371 xe = &new_xh->xh_entries[j]; 6372 6373 le16_add_cpu(&new_xh->xh_count, -1); 6374 if (new_xh->xh_count) { 6375 memmove(xe, xe + 1, 6376 (void *)last - (void *)xe); 6377 memset(last, 0, 6378 sizeof(struct ocfs2_xattr_entry)); 6379 } 6380 6381 /* 6382 * We don't want j to increase in the next round since 6383 * it is already moved ahead. 6384 */ 6385 j--; 6386 continue; 6387 } 6388 6389 if (ocfs2_xattr_is_local(xe)) 6390 continue; 6391 6392 ret = func(sb, old_bh, xh, i, &xv, NULL, para); 6393 if (ret) { 6394 mlog_errno(ret); 6395 break; 6396 } 6397 6398 ret = func(sb, new_bh, new_xh, j, &new_xv, &value_bh, para); 6399 if (ret) { 6400 mlog_errno(ret); 6401 break; 6402 } 6403 6404 /* 6405 * For the xattr which has l_tree_depth = 0, all the extent 6406 * recs have already be copied to the new xh with the 6407 * propriate OCFS2_EXT_REFCOUNTED flag we just need to 6408 * increase the refount count int the refcount tree. 6409 * 6410 * For the xattr which has l_tree_depth > 0, we need 6411 * to initialize it to the empty default value root, 6412 * and then insert the extents one by one. 6413 */ 6414 if (xv->xr_list.l_tree_depth) { 6415 memcpy(new_xv, &def_xv, sizeof(def_xv)); 6416 vb->vb_xv = new_xv; 6417 vb->vb_bh = value_bh; 6418 ocfs2_init_xattr_value_extent_tree(&data_et, 6419 INODE_CACHE(args->new_inode), vb); 6420 } 6421 6422 clusters = le32_to_cpu(xv->xr_clusters); 6423 cpos = 0; 6424 while (cpos < clusters) { 6425 ret = ocfs2_xattr_get_clusters(args->old_inode, 6426 cpos, 6427 &p_cluster, 6428 &num_clusters, 6429 &xv->xr_list, 6430 &ext_flags); 6431 if (ret) { 6432 mlog_errno(ret); 6433 goto out; 6434 } 6435 6436 BUG_ON(!p_cluster); 6437 6438 if (xv->xr_list.l_tree_depth) { 6439 ret = ocfs2_insert_extent(handle, 6440 &data_et, cpos, 6441 ocfs2_clusters_to_blocks( 6442 args->old_inode->i_sb, 6443 p_cluster), 6444 num_clusters, ext_flags, 6445 meta_ac); 6446 if (ret) { 6447 mlog_errno(ret); 6448 goto out; 6449 } 6450 } 6451 6452 ret = ocfs2_increase_refcount(handle, args->ref_ci, 6453 args->ref_root_bh, 6454 p_cluster, num_clusters, 6455 meta_ac, args->dealloc); 6456 if (ret) { 6457 mlog_errno(ret); 6458 goto out; 6459 } 6460 6461 cpos += num_clusters; 6462 } 6463 } 6464 6465 out: 6466 return ret; 6467 } 6468 6469 static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args) 6470 { 6471 int ret = 0, credits = 0; 6472 handle_t *handle; 6473 struct ocfs2_super *osb = OCFS2_SB(args->old_inode->i_sb); 6474 struct ocfs2_dinode *di = (struct ocfs2_dinode *)args->old_bh->b_data; 6475 int inline_size = le16_to_cpu(di->i_xattr_inline_size); 6476 int header_off = osb->sb->s_blocksize - inline_size; 6477 struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *) 6478 (args->old_bh->b_data + header_off); 6479 struct ocfs2_xattr_header *new_xh = (struct ocfs2_xattr_header *) 6480 (args->new_bh->b_data + header_off); 6481 struct ocfs2_alloc_context *meta_ac = NULL; 6482 struct ocfs2_inode_info *new_oi; 6483 struct ocfs2_dinode *new_di; 6484 struct ocfs2_xattr_value_buf vb = { 6485 .vb_bh = args->new_bh, 6486 .vb_access = ocfs2_journal_access_di, 6487 }; 6488 6489 ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh, 6490 &credits, &meta_ac); 6491 if (ret) { 6492 mlog_errno(ret); 6493 goto out; 6494 } 6495 6496 handle = ocfs2_start_trans(osb, credits); 6497 if (IS_ERR(handle)) { 6498 ret = PTR_ERR(handle); 6499 mlog_errno(ret); 6500 goto out; 6501 } 6502 6503 ret = ocfs2_journal_access_di(handle, INODE_CACHE(args->new_inode), 6504 args->new_bh, OCFS2_JOURNAL_ACCESS_WRITE); 6505 if (ret) { 6506 mlog_errno(ret); 6507 goto out_commit; 6508 } 6509 6510 memcpy(args->new_bh->b_data + header_off, 6511 args->old_bh->b_data + header_off, inline_size); 6512 6513 new_di = (struct ocfs2_dinode *)args->new_bh->b_data; 6514 new_di->i_xattr_inline_size = cpu_to_le16(inline_size); 6515 6516 ret = ocfs2_reflink_xattr_header(handle, args, args->old_bh, xh, 6517 args->new_bh, new_xh, &vb, meta_ac, 6518 ocfs2_get_xattr_value_root, NULL); 6519 if (ret) { 6520 mlog_errno(ret); 6521 goto out_commit; 6522 } 6523 6524 new_oi = OCFS2_I(args->new_inode); 6525 /* 6526 * Adjust extent record count to reserve space for extended attribute. 6527 * Inline data count had been adjusted in ocfs2_duplicate_inline_data(). 6528 */ 6529 if (!(new_oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) && 6530 !(ocfs2_inode_is_fast_symlink(args->new_inode))) { 6531 struct ocfs2_extent_list *el = &new_di->id2.i_list; 6532 le16_add_cpu(&el->l_count, -(inline_size / 6533 sizeof(struct ocfs2_extent_rec))); 6534 } 6535 spin_lock(&new_oi->ip_lock); 6536 new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL; 6537 new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features); 6538 spin_unlock(&new_oi->ip_lock); 6539 6540 ocfs2_journal_dirty(handle, args->new_bh); 6541 6542 out_commit: 6543 ocfs2_commit_trans(osb, handle); 6544 6545 out: 6546 if (meta_ac) 6547 ocfs2_free_alloc_context(meta_ac); 6548 return ret; 6549 } 6550 6551 static int ocfs2_create_empty_xattr_block(struct inode *inode, 6552 struct buffer_head *fe_bh, 6553 struct buffer_head **ret_bh, 6554 int indexed) 6555 { 6556 int ret; 6557 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 6558 struct ocfs2_xattr_set_ctxt ctxt; 6559 6560 memset(&ctxt, 0, sizeof(ctxt)); 6561 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &ctxt.meta_ac); 6562 if (ret < 0) { 6563 mlog_errno(ret); 6564 return ret; 6565 } 6566 6567 ctxt.handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_CREATE_CREDITS); 6568 if (IS_ERR(ctxt.handle)) { 6569 ret = PTR_ERR(ctxt.handle); 6570 mlog_errno(ret); 6571 goto out; 6572 } 6573 6574 trace_ocfs2_create_empty_xattr_block( 6575 (unsigned long long)fe_bh->b_blocknr, indexed); 6576 ret = ocfs2_create_xattr_block(inode, fe_bh, &ctxt, indexed, 6577 ret_bh); 6578 if (ret) 6579 mlog_errno(ret); 6580 6581 ocfs2_commit_trans(osb, ctxt.handle); 6582 out: 6583 ocfs2_free_alloc_context(ctxt.meta_ac); 6584 return ret; 6585 } 6586 6587 static int ocfs2_reflink_xattr_block(struct ocfs2_xattr_reflink *args, 6588 struct buffer_head *blk_bh, 6589 struct buffer_head *new_blk_bh) 6590 { 6591 int ret = 0, credits = 0; 6592 handle_t *handle; 6593 struct ocfs2_inode_info *new_oi = OCFS2_I(args->new_inode); 6594 struct ocfs2_dinode *new_di; 6595 struct ocfs2_super *osb = OCFS2_SB(args->new_inode->i_sb); 6596 int header_off = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); 6597 struct ocfs2_xattr_block *xb = 6598 (struct ocfs2_xattr_block *)blk_bh->b_data; 6599 struct ocfs2_xattr_header *xh = &xb->xb_attrs.xb_header; 6600 struct ocfs2_xattr_block *new_xb = 6601 (struct ocfs2_xattr_block *)new_blk_bh->b_data; 6602 struct ocfs2_xattr_header *new_xh = &new_xb->xb_attrs.xb_header; 6603 struct ocfs2_alloc_context *meta_ac; 6604 struct ocfs2_xattr_value_buf vb = { 6605 .vb_bh = new_blk_bh, 6606 .vb_access = ocfs2_journal_access_xb, 6607 }; 6608 6609 ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh, 6610 &credits, &meta_ac); 6611 if (ret) { 6612 mlog_errno(ret); 6613 return ret; 6614 } 6615 6616 /* One more credits in case we need to add xattr flags in new inode. */ 6617 handle = ocfs2_start_trans(osb, credits + 1); 6618 if (IS_ERR(handle)) { 6619 ret = PTR_ERR(handle); 6620 mlog_errno(ret); 6621 goto out; 6622 } 6623 6624 if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) { 6625 ret = ocfs2_journal_access_di(handle, 6626 INODE_CACHE(args->new_inode), 6627 args->new_bh, 6628 OCFS2_JOURNAL_ACCESS_WRITE); 6629 if (ret) { 6630 mlog_errno(ret); 6631 goto out_commit; 6632 } 6633 } 6634 6635 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(args->new_inode), 6636 new_blk_bh, OCFS2_JOURNAL_ACCESS_WRITE); 6637 if (ret) { 6638 mlog_errno(ret); 6639 goto out_commit; 6640 } 6641 6642 memcpy(new_blk_bh->b_data + header_off, blk_bh->b_data + header_off, 6643 osb->sb->s_blocksize - header_off); 6644 6645 ret = ocfs2_reflink_xattr_header(handle, args, blk_bh, xh, 6646 new_blk_bh, new_xh, &vb, meta_ac, 6647 ocfs2_get_xattr_value_root, NULL); 6648 if (ret) { 6649 mlog_errno(ret); 6650 goto out_commit; 6651 } 6652 6653 ocfs2_journal_dirty(handle, new_blk_bh); 6654 6655 if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) { 6656 new_di = (struct ocfs2_dinode *)args->new_bh->b_data; 6657 spin_lock(&new_oi->ip_lock); 6658 new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL; 6659 new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features); 6660 spin_unlock(&new_oi->ip_lock); 6661 6662 ocfs2_journal_dirty(handle, args->new_bh); 6663 } 6664 6665 out_commit: 6666 ocfs2_commit_trans(osb, handle); 6667 6668 out: 6669 ocfs2_free_alloc_context(meta_ac); 6670 return ret; 6671 } 6672 6673 struct ocfs2_reflink_xattr_tree_args { 6674 struct ocfs2_xattr_reflink *reflink; 6675 struct buffer_head *old_blk_bh; 6676 struct buffer_head *new_blk_bh; 6677 struct ocfs2_xattr_bucket *old_bucket; 6678 struct ocfs2_xattr_bucket *new_bucket; 6679 }; 6680 6681 /* 6682 * NOTE: 6683 * We have to handle the case that both old bucket and new bucket 6684 * will call this function to get the right ret_bh. 6685 * So The caller must give us the right bh. 6686 */ 6687 static int ocfs2_get_reflink_xattr_value_root(struct super_block *sb, 6688 struct buffer_head *bh, 6689 struct ocfs2_xattr_header *xh, 6690 int offset, 6691 struct ocfs2_xattr_value_root **xv, 6692 struct buffer_head **ret_bh, 6693 void *para) 6694 { 6695 struct ocfs2_reflink_xattr_tree_args *args = 6696 (struct ocfs2_reflink_xattr_tree_args *)para; 6697 struct ocfs2_xattr_bucket *bucket; 6698 6699 if (bh == args->old_bucket->bu_bhs[0]) 6700 bucket = args->old_bucket; 6701 else 6702 bucket = args->new_bucket; 6703 6704 return ocfs2_get_xattr_tree_value_root(sb, bucket, offset, 6705 xv, ret_bh); 6706 } 6707 6708 struct ocfs2_value_tree_metas { 6709 int num_metas; 6710 int credits; 6711 int num_recs; 6712 }; 6713 6714 static int ocfs2_value_tree_metas_in_bucket(struct super_block *sb, 6715 struct buffer_head *bh, 6716 struct ocfs2_xattr_header *xh, 6717 int offset, 6718 struct ocfs2_xattr_value_root **xv, 6719 struct buffer_head **ret_bh, 6720 void *para) 6721 { 6722 struct ocfs2_xattr_bucket *bucket = 6723 (struct ocfs2_xattr_bucket *)para; 6724 6725 return ocfs2_get_xattr_tree_value_root(sb, bucket, offset, 6726 xv, ret_bh); 6727 } 6728 6729 static int ocfs2_calc_value_tree_metas(struct inode *inode, 6730 struct ocfs2_xattr_bucket *bucket, 6731 void *para) 6732 { 6733 struct ocfs2_value_tree_metas *metas = 6734 (struct ocfs2_value_tree_metas *)para; 6735 struct ocfs2_xattr_header *xh = 6736 (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data; 6737 6738 /* Add the credits for this bucket first. */ 6739 metas->credits += bucket->bu_blocks; 6740 return ocfs2_value_metas_in_xattr_header(inode->i_sb, bucket->bu_bhs[0], 6741 xh, &metas->num_metas, 6742 &metas->credits, &metas->num_recs, 6743 ocfs2_value_tree_metas_in_bucket, 6744 bucket); 6745 } 6746 6747 /* 6748 * Given a xattr extent rec starting from blkno and having len clusters, 6749 * iterate all the buckets calculate how much metadata we need for reflinking 6750 * all the ocfs2_xattr_value_root and lock the allocators accordingly. 6751 */ 6752 static int ocfs2_lock_reflink_xattr_rec_allocators( 6753 struct ocfs2_reflink_xattr_tree_args *args, 6754 struct ocfs2_extent_tree *xt_et, 6755 u64 blkno, u32 len, int *credits, 6756 struct ocfs2_alloc_context **meta_ac, 6757 struct ocfs2_alloc_context **data_ac) 6758 { 6759 int ret, num_free_extents; 6760 struct ocfs2_value_tree_metas metas; 6761 struct ocfs2_super *osb = OCFS2_SB(args->reflink->old_inode->i_sb); 6762 struct ocfs2_refcount_block *rb; 6763 6764 memset(&metas, 0, sizeof(metas)); 6765 6766 ret = ocfs2_iterate_xattr_buckets(args->reflink->old_inode, blkno, len, 6767 ocfs2_calc_value_tree_metas, &metas); 6768 if (ret) { 6769 mlog_errno(ret); 6770 goto out; 6771 } 6772 6773 *credits = metas.credits; 6774 6775 /* 6776 * Calculate we need for refcount tree change. 6777 * 6778 * We need to add/modify num_recs in refcount tree, so just calculate 6779 * an approximate number we need for refcount tree change. 6780 * Sometimes we need to split the tree, and after split, half recs 6781 * will be moved to the new block, and a new block can only provide 6782 * half number of recs. So we multiple new blocks by 2. 6783 * In the end, we have to add credits for modifying the already 6784 * existed refcount block. 6785 */ 6786 rb = (struct ocfs2_refcount_block *)args->reflink->ref_root_bh->b_data; 6787 metas.num_recs = 6788 (metas.num_recs + ocfs2_refcount_recs_per_rb(osb->sb) - 1) / 6789 ocfs2_refcount_recs_per_rb(osb->sb) * 2; 6790 metas.num_metas += metas.num_recs; 6791 *credits += metas.num_recs + 6792 metas.num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS; 6793 if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL) 6794 *credits += le16_to_cpu(rb->rf_list.l_tree_depth) * 6795 le16_to_cpu(rb->rf_list.l_next_free_rec) + 1; 6796 else 6797 *credits += 1; 6798 6799 /* count in the xattr tree change. */ 6800 num_free_extents = ocfs2_num_free_extents(osb, xt_et); 6801 if (num_free_extents < 0) { 6802 ret = num_free_extents; 6803 mlog_errno(ret); 6804 goto out; 6805 } 6806 6807 if (num_free_extents < len) 6808 metas.num_metas += ocfs2_extend_meta_needed(xt_et->et_root_el); 6809 6810 *credits += ocfs2_calc_extend_credits(osb->sb, 6811 xt_et->et_root_el); 6812 6813 if (metas.num_metas) { 6814 ret = ocfs2_reserve_new_metadata_blocks(osb, metas.num_metas, 6815 meta_ac); 6816 if (ret) { 6817 mlog_errno(ret); 6818 goto out; 6819 } 6820 } 6821 6822 if (len) { 6823 ret = ocfs2_reserve_clusters(osb, len, data_ac); 6824 if (ret) 6825 mlog_errno(ret); 6826 } 6827 out: 6828 if (ret) { 6829 if (*meta_ac) { 6830 ocfs2_free_alloc_context(*meta_ac); 6831 *meta_ac = NULL; 6832 } 6833 } 6834 6835 return ret; 6836 } 6837 6838 static int ocfs2_reflink_xattr_bucket(handle_t *handle, 6839 u64 blkno, u64 new_blkno, u32 clusters, 6840 u32 *cpos, int num_buckets, 6841 struct ocfs2_alloc_context *meta_ac, 6842 struct ocfs2_alloc_context *data_ac, 6843 struct ocfs2_reflink_xattr_tree_args *args) 6844 { 6845 int i, j, ret = 0; 6846 struct super_block *sb = args->reflink->old_inode->i_sb; 6847 int bpb = args->old_bucket->bu_blocks; 6848 struct ocfs2_xattr_value_buf vb = { 6849 .vb_access = ocfs2_journal_access, 6850 }; 6851 6852 for (i = 0; i < num_buckets; i++, blkno += bpb, new_blkno += bpb) { 6853 ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno); 6854 if (ret) { 6855 mlog_errno(ret); 6856 break; 6857 } 6858 6859 ret = ocfs2_init_xattr_bucket(args->new_bucket, new_blkno, 1); 6860 if (ret) { 6861 mlog_errno(ret); 6862 break; 6863 } 6864 6865 ret = ocfs2_xattr_bucket_journal_access(handle, 6866 args->new_bucket, 6867 OCFS2_JOURNAL_ACCESS_CREATE); 6868 if (ret) { 6869 mlog_errno(ret); 6870 break; 6871 } 6872 6873 for (j = 0; j < bpb; j++) 6874 memcpy(bucket_block(args->new_bucket, j), 6875 bucket_block(args->old_bucket, j), 6876 sb->s_blocksize); 6877 6878 /* 6879 * Record the start cpos so that we can use it to initialize 6880 * our xattr tree we also set the xh_num_bucket for the new 6881 * bucket. 6882 */ 6883 if (i == 0) { 6884 *cpos = le32_to_cpu(bucket_xh(args->new_bucket)-> 6885 xh_entries[0].xe_name_hash); 6886 bucket_xh(args->new_bucket)->xh_num_buckets = 6887 cpu_to_le16(num_buckets); 6888 } 6889 6890 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket); 6891 6892 ret = ocfs2_reflink_xattr_header(handle, args->reflink, 6893 args->old_bucket->bu_bhs[0], 6894 bucket_xh(args->old_bucket), 6895 args->new_bucket->bu_bhs[0], 6896 bucket_xh(args->new_bucket), 6897 &vb, meta_ac, 6898 ocfs2_get_reflink_xattr_value_root, 6899 args); 6900 if (ret) { 6901 mlog_errno(ret); 6902 break; 6903 } 6904 6905 /* 6906 * Re-access and dirty the bucket to calculate metaecc. 6907 * Because we may extend the transaction in reflink_xattr_header 6908 * which will let the already accessed block gone. 6909 */ 6910 ret = ocfs2_xattr_bucket_journal_access(handle, 6911 args->new_bucket, 6912 OCFS2_JOURNAL_ACCESS_WRITE); 6913 if (ret) { 6914 mlog_errno(ret); 6915 break; 6916 } 6917 6918 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket); 6919 6920 ocfs2_xattr_bucket_relse(args->old_bucket); 6921 ocfs2_xattr_bucket_relse(args->new_bucket); 6922 } 6923 6924 ocfs2_xattr_bucket_relse(args->old_bucket); 6925 ocfs2_xattr_bucket_relse(args->new_bucket); 6926 return ret; 6927 } 6928 6929 static int ocfs2_reflink_xattr_buckets(handle_t *handle, 6930 struct inode *inode, 6931 struct ocfs2_reflink_xattr_tree_args *args, 6932 struct ocfs2_extent_tree *et, 6933 struct ocfs2_alloc_context *meta_ac, 6934 struct ocfs2_alloc_context *data_ac, 6935 u64 blkno, u32 cpos, u32 len) 6936 { 6937 int ret, first_inserted = 0; 6938 u32 p_cluster, num_clusters, reflink_cpos = 0; 6939 u64 new_blkno; 6940 unsigned int num_buckets, reflink_buckets; 6941 unsigned int bpc = 6942 ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)); 6943 6944 ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno); 6945 if (ret) { 6946 mlog_errno(ret); 6947 goto out; 6948 } 6949 num_buckets = le16_to_cpu(bucket_xh(args->old_bucket)->xh_num_buckets); 6950 ocfs2_xattr_bucket_relse(args->old_bucket); 6951 6952 while (len && num_buckets) { 6953 ret = ocfs2_claim_clusters(handle, data_ac, 6954 1, &p_cluster, &num_clusters); 6955 if (ret) { 6956 mlog_errno(ret); 6957 goto out; 6958 } 6959 6960 new_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 6961 reflink_buckets = min(num_buckets, bpc * num_clusters); 6962 6963 ret = ocfs2_reflink_xattr_bucket(handle, blkno, 6964 new_blkno, num_clusters, 6965 &reflink_cpos, reflink_buckets, 6966 meta_ac, data_ac, args); 6967 if (ret) { 6968 mlog_errno(ret); 6969 goto out; 6970 } 6971 6972 /* 6973 * For the 1st allocated cluster, we make it use the same cpos 6974 * so that the xattr tree looks the same as the original one 6975 * in the most case. 6976 */ 6977 if (!first_inserted) { 6978 reflink_cpos = cpos; 6979 first_inserted = 1; 6980 } 6981 ret = ocfs2_insert_extent(handle, et, reflink_cpos, new_blkno, 6982 num_clusters, 0, meta_ac); 6983 if (ret) 6984 mlog_errno(ret); 6985 6986 trace_ocfs2_reflink_xattr_buckets((unsigned long long)new_blkno, 6987 num_clusters, reflink_cpos); 6988 6989 len -= num_clusters; 6990 blkno += ocfs2_clusters_to_blocks(inode->i_sb, num_clusters); 6991 num_buckets -= reflink_buckets; 6992 } 6993 out: 6994 return ret; 6995 } 6996 6997 /* 6998 * Create the same xattr extent record in the new inode's xattr tree. 6999 */ 7000 static int ocfs2_reflink_xattr_rec(struct inode *inode, 7001 struct buffer_head *root_bh, 7002 u64 blkno, 7003 u32 cpos, 7004 u32 len, 7005 void *para) 7006 { 7007 int ret, credits = 0; 7008 handle_t *handle; 7009 struct ocfs2_reflink_xattr_tree_args *args = 7010 (struct ocfs2_reflink_xattr_tree_args *)para; 7011 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 7012 struct ocfs2_alloc_context *meta_ac = NULL; 7013 struct ocfs2_alloc_context *data_ac = NULL; 7014 struct ocfs2_extent_tree et; 7015 7016 trace_ocfs2_reflink_xattr_rec((unsigned long long)blkno, len); 7017 7018 ocfs2_init_xattr_tree_extent_tree(&et, 7019 INODE_CACHE(args->reflink->new_inode), 7020 args->new_blk_bh); 7021 7022 ret = ocfs2_lock_reflink_xattr_rec_allocators(args, &et, blkno, 7023 len, &credits, 7024 &meta_ac, &data_ac); 7025 if (ret) { 7026 mlog_errno(ret); 7027 goto out; 7028 } 7029 7030 handle = ocfs2_start_trans(osb, credits); 7031 if (IS_ERR(handle)) { 7032 ret = PTR_ERR(handle); 7033 mlog_errno(ret); 7034 goto out; 7035 } 7036 7037 ret = ocfs2_reflink_xattr_buckets(handle, inode, args, &et, 7038 meta_ac, data_ac, 7039 blkno, cpos, len); 7040 if (ret) 7041 mlog_errno(ret); 7042 7043 ocfs2_commit_trans(osb, handle); 7044 7045 out: 7046 if (meta_ac) 7047 ocfs2_free_alloc_context(meta_ac); 7048 if (data_ac) 7049 ocfs2_free_alloc_context(data_ac); 7050 return ret; 7051 } 7052 7053 /* 7054 * Create reflinked xattr buckets. 7055 * We will add bucket one by one, and refcount all the xattrs in the bucket 7056 * if they are stored outside. 7057 */ 7058 static int ocfs2_reflink_xattr_tree(struct ocfs2_xattr_reflink *args, 7059 struct buffer_head *blk_bh, 7060 struct buffer_head *new_blk_bh) 7061 { 7062 int ret; 7063 struct ocfs2_reflink_xattr_tree_args para; 7064 7065 memset(¶, 0, sizeof(para)); 7066 para.reflink = args; 7067 para.old_blk_bh = blk_bh; 7068 para.new_blk_bh = new_blk_bh; 7069 7070 para.old_bucket = ocfs2_xattr_bucket_new(args->old_inode); 7071 if (!para.old_bucket) { 7072 mlog_errno(-ENOMEM); 7073 return -ENOMEM; 7074 } 7075 7076 para.new_bucket = ocfs2_xattr_bucket_new(args->new_inode); 7077 if (!para.new_bucket) { 7078 ret = -ENOMEM; 7079 mlog_errno(ret); 7080 goto out; 7081 } 7082 7083 ret = ocfs2_iterate_xattr_index_block(args->old_inode, blk_bh, 7084 ocfs2_reflink_xattr_rec, 7085 ¶); 7086 if (ret) 7087 mlog_errno(ret); 7088 7089 out: 7090 ocfs2_xattr_bucket_free(para.old_bucket); 7091 ocfs2_xattr_bucket_free(para.new_bucket); 7092 return ret; 7093 } 7094 7095 static int ocfs2_reflink_xattr_in_block(struct ocfs2_xattr_reflink *args, 7096 struct buffer_head *blk_bh) 7097 { 7098 int ret, indexed = 0; 7099 struct buffer_head *new_blk_bh = NULL; 7100 struct ocfs2_xattr_block *xb = 7101 (struct ocfs2_xattr_block *)blk_bh->b_data; 7102 7103 7104 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) 7105 indexed = 1; 7106 7107 ret = ocfs2_create_empty_xattr_block(args->new_inode, args->new_bh, 7108 &new_blk_bh, indexed); 7109 if (ret) { 7110 mlog_errno(ret); 7111 goto out; 7112 } 7113 7114 if (!indexed) 7115 ret = ocfs2_reflink_xattr_block(args, blk_bh, new_blk_bh); 7116 else 7117 ret = ocfs2_reflink_xattr_tree(args, blk_bh, new_blk_bh); 7118 if (ret) 7119 mlog_errno(ret); 7120 7121 out: 7122 brelse(new_blk_bh); 7123 return ret; 7124 } 7125 7126 static int ocfs2_reflink_xattr_no_security(struct ocfs2_xattr_entry *xe) 7127 { 7128 int type = ocfs2_xattr_get_type(xe); 7129 7130 return type != OCFS2_XATTR_INDEX_SECURITY && 7131 type != OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS && 7132 type != OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT; 7133 } 7134 7135 int ocfs2_reflink_xattrs(struct inode *old_inode, 7136 struct buffer_head *old_bh, 7137 struct inode *new_inode, 7138 struct buffer_head *new_bh, 7139 bool preserve_security) 7140 { 7141 int ret; 7142 struct ocfs2_xattr_reflink args; 7143 struct ocfs2_inode_info *oi = OCFS2_I(old_inode); 7144 struct ocfs2_dinode *di = (struct ocfs2_dinode *)old_bh->b_data; 7145 struct buffer_head *blk_bh = NULL; 7146 struct ocfs2_cached_dealloc_ctxt dealloc; 7147 struct ocfs2_refcount_tree *ref_tree; 7148 struct buffer_head *ref_root_bh = NULL; 7149 7150 ret = ocfs2_lock_refcount_tree(OCFS2_SB(old_inode->i_sb), 7151 le64_to_cpu(di->i_refcount_loc), 7152 1, &ref_tree, &ref_root_bh); 7153 if (ret) { 7154 mlog_errno(ret); 7155 goto out; 7156 } 7157 7158 ocfs2_init_dealloc_ctxt(&dealloc); 7159 7160 args.old_inode = old_inode; 7161 args.new_inode = new_inode; 7162 args.old_bh = old_bh; 7163 args.new_bh = new_bh; 7164 args.ref_ci = &ref_tree->rf_ci; 7165 args.ref_root_bh = ref_root_bh; 7166 args.dealloc = &dealloc; 7167 if (preserve_security) 7168 args.xattr_reflinked = NULL; 7169 else 7170 args.xattr_reflinked = ocfs2_reflink_xattr_no_security; 7171 7172 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 7173 ret = ocfs2_reflink_xattr_inline(&args); 7174 if (ret) { 7175 mlog_errno(ret); 7176 goto out_unlock; 7177 } 7178 } 7179 7180 if (!di->i_xattr_loc) 7181 goto out_unlock; 7182 7183 ret = ocfs2_read_xattr_block(old_inode, le64_to_cpu(di->i_xattr_loc), 7184 &blk_bh); 7185 if (ret < 0) { 7186 mlog_errno(ret); 7187 goto out_unlock; 7188 } 7189 7190 ret = ocfs2_reflink_xattr_in_block(&args, blk_bh); 7191 if (ret) 7192 mlog_errno(ret); 7193 7194 brelse(blk_bh); 7195 7196 out_unlock: 7197 ocfs2_unlock_refcount_tree(OCFS2_SB(old_inode->i_sb), 7198 ref_tree, 1); 7199 brelse(ref_root_bh); 7200 7201 if (ocfs2_dealloc_has_cluster(&dealloc)) { 7202 ocfs2_schedule_truncate_log_flush(OCFS2_SB(old_inode->i_sb), 1); 7203 ocfs2_run_deallocs(OCFS2_SB(old_inode->i_sb), &dealloc); 7204 } 7205 7206 out: 7207 return ret; 7208 } 7209 7210 /* 7211 * Initialize security and acl for a already created inode. 7212 * Used for reflink a non-preserve-security file. 7213 * 7214 * It uses common api like ocfs2_xattr_set, so the caller 7215 * must not hold any lock expect i_mutex. 7216 */ 7217 int ocfs2_init_security_and_acl(struct inode *dir, 7218 struct inode *inode, 7219 const struct qstr *qstr) 7220 { 7221 int ret = 0; 7222 struct buffer_head *dir_bh = NULL; 7223 7224 ret = ocfs2_init_security_get(inode, dir, qstr, NULL); 7225 if (ret) { 7226 mlog_errno(ret); 7227 goto leave; 7228 } 7229 7230 ret = ocfs2_inode_lock(dir, &dir_bh, 0); 7231 if (ret) { 7232 mlog_errno(ret); 7233 goto leave; 7234 } 7235 ret = ocfs2_init_acl(NULL, inode, dir, NULL, dir_bh, NULL, NULL); 7236 if (ret) 7237 mlog_errno(ret); 7238 7239 ocfs2_inode_unlock(dir, 0); 7240 brelse(dir_bh); 7241 leave: 7242 return ret; 7243 } 7244 7245 /* 7246 * 'security' attributes support 7247 */ 7248 static int ocfs2_xattr_security_get(const struct xattr_handler *handler, 7249 struct dentry *unused, struct inode *inode, 7250 const char *name, void *buffer, size_t size) 7251 { 7252 return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_SECURITY, 7253 name, buffer, size); 7254 } 7255 7256 static int ocfs2_xattr_security_set(const struct xattr_handler *handler, 7257 struct dentry *unused, struct inode *inode, 7258 const char *name, const void *value, 7259 size_t size, int flags) 7260 { 7261 return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, 7262 name, value, size, flags); 7263 } 7264 7265 static int ocfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array, 7266 void *fs_info) 7267 { 7268 const struct xattr *xattr; 7269 int err = 0; 7270 7271 for (xattr = xattr_array; xattr->name != NULL; xattr++) { 7272 err = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, 7273 xattr->name, xattr->value, 7274 xattr->value_len, XATTR_CREATE); 7275 if (err) 7276 break; 7277 } 7278 return err; 7279 } 7280 7281 int ocfs2_init_security_get(struct inode *inode, 7282 struct inode *dir, 7283 const struct qstr *qstr, 7284 struct ocfs2_security_xattr_info *si) 7285 { 7286 /* check whether ocfs2 support feature xattr */ 7287 if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb))) 7288 return -EOPNOTSUPP; 7289 if (si) 7290 return security_old_inode_init_security(inode, dir, qstr, 7291 &si->name, &si->value, 7292 &si->value_len); 7293 7294 return security_inode_init_security(inode, dir, qstr, 7295 &ocfs2_initxattrs, NULL); 7296 } 7297 7298 int ocfs2_init_security_set(handle_t *handle, 7299 struct inode *inode, 7300 struct buffer_head *di_bh, 7301 struct ocfs2_security_xattr_info *si, 7302 struct ocfs2_alloc_context *xattr_ac, 7303 struct ocfs2_alloc_context *data_ac) 7304 { 7305 return ocfs2_xattr_set_handle(handle, inode, di_bh, 7306 OCFS2_XATTR_INDEX_SECURITY, 7307 si->name, si->value, si->value_len, 0, 7308 xattr_ac, data_ac); 7309 } 7310 7311 const struct xattr_handler ocfs2_xattr_security_handler = { 7312 .prefix = XATTR_SECURITY_PREFIX, 7313 .get = ocfs2_xattr_security_get, 7314 .set = ocfs2_xattr_security_set, 7315 }; 7316 7317 /* 7318 * 'trusted' attributes support 7319 */ 7320 static int ocfs2_xattr_trusted_get(const struct xattr_handler *handler, 7321 struct dentry *unused, struct inode *inode, 7322 const char *name, void *buffer, size_t size) 7323 { 7324 return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_TRUSTED, 7325 name, buffer, size); 7326 } 7327 7328 static int ocfs2_xattr_trusted_set(const struct xattr_handler *handler, 7329 struct dentry *unused, struct inode *inode, 7330 const char *name, const void *value, 7331 size_t size, int flags) 7332 { 7333 return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_TRUSTED, 7334 name, value, size, flags); 7335 } 7336 7337 const struct xattr_handler ocfs2_xattr_trusted_handler = { 7338 .prefix = XATTR_TRUSTED_PREFIX, 7339 .get = ocfs2_xattr_trusted_get, 7340 .set = ocfs2_xattr_trusted_set, 7341 }; 7342 7343 /* 7344 * 'user' attributes support 7345 */ 7346 static int ocfs2_xattr_user_get(const struct xattr_handler *handler, 7347 struct dentry *unused, struct inode *inode, 7348 const char *name, void *buffer, size_t size) 7349 { 7350 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 7351 7352 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 7353 return -EOPNOTSUPP; 7354 return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_USER, name, 7355 buffer, size); 7356 } 7357 7358 static int ocfs2_xattr_user_set(const struct xattr_handler *handler, 7359 struct dentry *unused, struct inode *inode, 7360 const char *name, const void *value, 7361 size_t size, int flags) 7362 { 7363 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 7364 7365 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 7366 return -EOPNOTSUPP; 7367 7368 return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_USER, 7369 name, value, size, flags); 7370 } 7371 7372 const struct xattr_handler ocfs2_xattr_user_handler = { 7373 .prefix = XATTR_USER_PREFIX, 7374 .get = ocfs2_xattr_user_get, 7375 .set = ocfs2_xattr_user_set, 7376 }; 7377