1 /* -*- mode: c; c-basic-offset: 8; -*- 2 * vim: noexpandtab sw=8 ts=8 sts=0: 3 * 4 * xattr.c 5 * 6 * Copyright (C) 2004, 2008 Oracle. All rights reserved. 7 * 8 * CREDITS: 9 * Lots of code in this file is copy from linux/fs/ext3/xattr.c. 10 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> 11 * 12 * This program is free software; you can redistribute it and/or 13 * modify it under the terms of the GNU General Public 14 * License version 2 as published by the Free Software Foundation. 15 * 16 * This program is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 * General Public License for more details. 20 */ 21 22 #include <linux/capability.h> 23 #include <linux/fs.h> 24 #include <linux/types.h> 25 #include <linux/slab.h> 26 #include <linux/highmem.h> 27 #include <linux/pagemap.h> 28 #include <linux/uio.h> 29 #include <linux/sched.h> 30 #include <linux/splice.h> 31 #include <linux/mount.h> 32 #include <linux/writeback.h> 33 #include <linux/falloc.h> 34 #include <linux/sort.h> 35 #include <linux/init.h> 36 #include <linux/module.h> 37 #include <linux/string.h> 38 #include <linux/security.h> 39 40 #include <cluster/masklog.h> 41 42 #include "ocfs2.h" 43 #include "alloc.h" 44 #include "blockcheck.h" 45 #include "dlmglue.h" 46 #include "file.h" 47 #include "symlink.h" 48 #include "sysfile.h" 49 #include "inode.h" 50 #include "journal.h" 51 #include "ocfs2_fs.h" 52 #include "suballoc.h" 53 #include "uptodate.h" 54 #include "buffer_head_io.h" 55 #include "super.h" 56 #include "xattr.h" 57 #include "refcounttree.h" 58 #include "acl.h" 59 #include "ocfs2_trace.h" 60 61 struct ocfs2_xattr_def_value_root { 62 struct ocfs2_xattr_value_root xv; 63 struct ocfs2_extent_rec er; 64 }; 65 66 struct ocfs2_xattr_bucket { 67 /* The inode these xattrs are associated with */ 68 struct inode *bu_inode; 69 70 /* The actual buffers that make up the bucket */ 71 struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET]; 72 73 /* How many blocks make up one bucket for this filesystem */ 74 int bu_blocks; 75 }; 76 77 struct ocfs2_xattr_set_ctxt { 78 handle_t *handle; 79 struct ocfs2_alloc_context *meta_ac; 80 struct ocfs2_alloc_context *data_ac; 81 struct ocfs2_cached_dealloc_ctxt dealloc; 82 int set_abort; 83 }; 84 85 #define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root)) 86 #define OCFS2_XATTR_INLINE_SIZE 80 87 #define OCFS2_XATTR_HEADER_GAP 4 88 #define OCFS2_XATTR_FREE_IN_IBODY (OCFS2_MIN_XATTR_INLINE_SIZE \ 89 - sizeof(struct ocfs2_xattr_header) \ 90 - OCFS2_XATTR_HEADER_GAP) 91 #define OCFS2_XATTR_FREE_IN_BLOCK(ptr) ((ptr)->i_sb->s_blocksize \ 92 - sizeof(struct ocfs2_xattr_block) \ 93 - sizeof(struct ocfs2_xattr_header) \ 94 - OCFS2_XATTR_HEADER_GAP) 95 96 static struct ocfs2_xattr_def_value_root def_xv = { 97 .xv.xr_list.l_count = cpu_to_le16(1), 98 }; 99 100 const struct xattr_handler *ocfs2_xattr_handlers[] = { 101 &ocfs2_xattr_user_handler, 102 &posix_acl_access_xattr_handler, 103 &posix_acl_default_xattr_handler, 104 &ocfs2_xattr_trusted_handler, 105 &ocfs2_xattr_security_handler, 106 NULL 107 }; 108 109 static const struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = { 110 [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler, 111 [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS] 112 = &posix_acl_access_xattr_handler, 113 [OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT] 114 = &posix_acl_default_xattr_handler, 115 [OCFS2_XATTR_INDEX_TRUSTED] = &ocfs2_xattr_trusted_handler, 116 [OCFS2_XATTR_INDEX_SECURITY] = &ocfs2_xattr_security_handler, 117 }; 118 119 struct ocfs2_xattr_info { 120 int xi_name_index; 121 const char *xi_name; 122 int xi_name_len; 123 const void *xi_value; 124 size_t xi_value_len; 125 }; 126 127 struct ocfs2_xattr_search { 128 struct buffer_head *inode_bh; 129 /* 130 * xattr_bh point to the block buffer head which has extended attribute 131 * when extended attribute in inode, xattr_bh is equal to inode_bh. 132 */ 133 struct buffer_head *xattr_bh; 134 struct ocfs2_xattr_header *header; 135 struct ocfs2_xattr_bucket *bucket; 136 void *base; 137 void *end; 138 struct ocfs2_xattr_entry *here; 139 int not_found; 140 }; 141 142 /* Operations on struct ocfs2_xa_entry */ 143 struct ocfs2_xa_loc; 144 struct ocfs2_xa_loc_operations { 145 /* 146 * Journal functions 147 */ 148 int (*xlo_journal_access)(handle_t *handle, struct ocfs2_xa_loc *loc, 149 int type); 150 void (*xlo_journal_dirty)(handle_t *handle, struct ocfs2_xa_loc *loc); 151 152 /* 153 * Return a pointer to the appropriate buffer in loc->xl_storage 154 * at the given offset from loc->xl_header. 155 */ 156 void *(*xlo_offset_pointer)(struct ocfs2_xa_loc *loc, int offset); 157 158 /* Can we reuse the existing entry for the new value? */ 159 int (*xlo_can_reuse)(struct ocfs2_xa_loc *loc, 160 struct ocfs2_xattr_info *xi); 161 162 /* How much space is needed for the new value? */ 163 int (*xlo_check_space)(struct ocfs2_xa_loc *loc, 164 struct ocfs2_xattr_info *xi); 165 166 /* 167 * Return the offset of the first name+value pair. This is 168 * the start of our downward-filling free space. 169 */ 170 int (*xlo_get_free_start)(struct ocfs2_xa_loc *loc); 171 172 /* 173 * Remove the name+value at this location. Do whatever is 174 * appropriate with the remaining name+value pairs. 175 */ 176 void (*xlo_wipe_namevalue)(struct ocfs2_xa_loc *loc); 177 178 /* Fill xl_entry with a new entry */ 179 void (*xlo_add_entry)(struct ocfs2_xa_loc *loc, u32 name_hash); 180 181 /* Add name+value storage to an entry */ 182 void (*xlo_add_namevalue)(struct ocfs2_xa_loc *loc, int size); 183 184 /* 185 * Initialize the value buf's access and bh fields for this entry. 186 * ocfs2_xa_fill_value_buf() will handle the xv pointer. 187 */ 188 void (*xlo_fill_value_buf)(struct ocfs2_xa_loc *loc, 189 struct ocfs2_xattr_value_buf *vb); 190 }; 191 192 /* 193 * Describes an xattr entry location. This is a memory structure 194 * tracking the on-disk structure. 195 */ 196 struct ocfs2_xa_loc { 197 /* This xattr belongs to this inode */ 198 struct inode *xl_inode; 199 200 /* The ocfs2_xattr_header inside the on-disk storage. Not NULL. */ 201 struct ocfs2_xattr_header *xl_header; 202 203 /* Bytes from xl_header to the end of the storage */ 204 int xl_size; 205 206 /* 207 * The ocfs2_xattr_entry this location describes. If this is 208 * NULL, this location describes the on-disk structure where it 209 * would have been. 210 */ 211 struct ocfs2_xattr_entry *xl_entry; 212 213 /* 214 * Internal housekeeping 215 */ 216 217 /* Buffer(s) containing this entry */ 218 void *xl_storage; 219 220 /* Operations on the storage backing this location */ 221 const struct ocfs2_xa_loc_operations *xl_ops; 222 }; 223 224 /* 225 * Convenience functions to calculate how much space is needed for a 226 * given name+value pair 227 */ 228 static int namevalue_size(int name_len, uint64_t value_len) 229 { 230 if (value_len > OCFS2_XATTR_INLINE_SIZE) 231 return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE; 232 else 233 return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len); 234 } 235 236 static int namevalue_size_xi(struct ocfs2_xattr_info *xi) 237 { 238 return namevalue_size(xi->xi_name_len, xi->xi_value_len); 239 } 240 241 static int namevalue_size_xe(struct ocfs2_xattr_entry *xe) 242 { 243 u64 value_len = le64_to_cpu(xe->xe_value_size); 244 245 BUG_ON((value_len > OCFS2_XATTR_INLINE_SIZE) && 246 ocfs2_xattr_is_local(xe)); 247 return namevalue_size(xe->xe_name_len, value_len); 248 } 249 250 251 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb, 252 struct ocfs2_xattr_header *xh, 253 int index, 254 int *block_off, 255 int *new_offset); 256 257 static int ocfs2_xattr_block_find(struct inode *inode, 258 int name_index, 259 const char *name, 260 struct ocfs2_xattr_search *xs); 261 static int ocfs2_xattr_index_block_find(struct inode *inode, 262 struct buffer_head *root_bh, 263 int name_index, 264 const char *name, 265 struct ocfs2_xattr_search *xs); 266 267 static int ocfs2_xattr_tree_list_index_block(struct inode *inode, 268 struct buffer_head *blk_bh, 269 char *buffer, 270 size_t buffer_size); 271 272 static int ocfs2_xattr_create_index_block(struct inode *inode, 273 struct ocfs2_xattr_search *xs, 274 struct ocfs2_xattr_set_ctxt *ctxt); 275 276 static int ocfs2_xattr_set_entry_index_block(struct inode *inode, 277 struct ocfs2_xattr_info *xi, 278 struct ocfs2_xattr_search *xs, 279 struct ocfs2_xattr_set_ctxt *ctxt); 280 281 typedef int (xattr_tree_rec_func)(struct inode *inode, 282 struct buffer_head *root_bh, 283 u64 blkno, u32 cpos, u32 len, void *para); 284 static int ocfs2_iterate_xattr_index_block(struct inode *inode, 285 struct buffer_head *root_bh, 286 xattr_tree_rec_func *rec_func, 287 void *para); 288 static int ocfs2_delete_xattr_in_bucket(struct inode *inode, 289 struct ocfs2_xattr_bucket *bucket, 290 void *para); 291 static int ocfs2_rm_xattr_cluster(struct inode *inode, 292 struct buffer_head *root_bh, 293 u64 blkno, 294 u32 cpos, 295 u32 len, 296 void *para); 297 298 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle, 299 u64 src_blk, u64 last_blk, u64 to_blk, 300 unsigned int start_bucket, 301 u32 *first_hash); 302 static int ocfs2_prepare_refcount_xattr(struct inode *inode, 303 struct ocfs2_dinode *di, 304 struct ocfs2_xattr_info *xi, 305 struct ocfs2_xattr_search *xis, 306 struct ocfs2_xattr_search *xbs, 307 struct ocfs2_refcount_tree **ref_tree, 308 int *meta_need, 309 int *credits); 310 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb, 311 struct ocfs2_xattr_bucket *bucket, 312 int offset, 313 struct ocfs2_xattr_value_root **xv, 314 struct buffer_head **bh); 315 316 static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb) 317 { 318 return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE; 319 } 320 321 static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb) 322 { 323 return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits); 324 } 325 326 #define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr) 327 #define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data) 328 #define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0)) 329 330 static struct ocfs2_xattr_bucket *ocfs2_xattr_bucket_new(struct inode *inode) 331 { 332 struct ocfs2_xattr_bucket *bucket; 333 int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 334 335 BUG_ON(blks > OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET); 336 337 bucket = kzalloc(sizeof(struct ocfs2_xattr_bucket), GFP_NOFS); 338 if (bucket) { 339 bucket->bu_inode = inode; 340 bucket->bu_blocks = blks; 341 } 342 343 return bucket; 344 } 345 346 static void ocfs2_xattr_bucket_relse(struct ocfs2_xattr_bucket *bucket) 347 { 348 int i; 349 350 for (i = 0; i < bucket->bu_blocks; i++) { 351 brelse(bucket->bu_bhs[i]); 352 bucket->bu_bhs[i] = NULL; 353 } 354 } 355 356 static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket) 357 { 358 if (bucket) { 359 ocfs2_xattr_bucket_relse(bucket); 360 bucket->bu_inode = NULL; 361 kfree(bucket); 362 } 363 } 364 365 /* 366 * A bucket that has never been written to disk doesn't need to be 367 * read. We just need the buffer_heads. Don't call this for 368 * buckets that are already on disk. ocfs2_read_xattr_bucket() initializes 369 * them fully. 370 */ 371 static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket, 372 u64 xb_blkno, int new) 373 { 374 int i, rc = 0; 375 376 for (i = 0; i < bucket->bu_blocks; i++) { 377 bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb, 378 xb_blkno + i); 379 if (!bucket->bu_bhs[i]) { 380 rc = -ENOMEM; 381 mlog_errno(rc); 382 break; 383 } 384 385 if (!ocfs2_buffer_uptodate(INODE_CACHE(bucket->bu_inode), 386 bucket->bu_bhs[i])) { 387 if (new) 388 ocfs2_set_new_buffer_uptodate(INODE_CACHE(bucket->bu_inode), 389 bucket->bu_bhs[i]); 390 else { 391 set_buffer_uptodate(bucket->bu_bhs[i]); 392 ocfs2_set_buffer_uptodate(INODE_CACHE(bucket->bu_inode), 393 bucket->bu_bhs[i]); 394 } 395 } 396 } 397 398 if (rc) 399 ocfs2_xattr_bucket_relse(bucket); 400 return rc; 401 } 402 403 /* Read the xattr bucket at xb_blkno */ 404 static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket, 405 u64 xb_blkno) 406 { 407 int rc; 408 409 rc = ocfs2_read_blocks(INODE_CACHE(bucket->bu_inode), xb_blkno, 410 bucket->bu_blocks, bucket->bu_bhs, 0, 411 NULL); 412 if (!rc) { 413 spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 414 rc = ocfs2_validate_meta_ecc_bhs(bucket->bu_inode->i_sb, 415 bucket->bu_bhs, 416 bucket->bu_blocks, 417 &bucket_xh(bucket)->xh_check); 418 spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 419 if (rc) 420 mlog_errno(rc); 421 } 422 423 if (rc) 424 ocfs2_xattr_bucket_relse(bucket); 425 return rc; 426 } 427 428 static int ocfs2_xattr_bucket_journal_access(handle_t *handle, 429 struct ocfs2_xattr_bucket *bucket, 430 int type) 431 { 432 int i, rc = 0; 433 434 for (i = 0; i < bucket->bu_blocks; i++) { 435 rc = ocfs2_journal_access(handle, 436 INODE_CACHE(bucket->bu_inode), 437 bucket->bu_bhs[i], type); 438 if (rc) { 439 mlog_errno(rc); 440 break; 441 } 442 } 443 444 return rc; 445 } 446 447 static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle, 448 struct ocfs2_xattr_bucket *bucket) 449 { 450 int i; 451 452 spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 453 ocfs2_compute_meta_ecc_bhs(bucket->bu_inode->i_sb, 454 bucket->bu_bhs, bucket->bu_blocks, 455 &bucket_xh(bucket)->xh_check); 456 spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 457 458 for (i = 0; i < bucket->bu_blocks; i++) 459 ocfs2_journal_dirty(handle, bucket->bu_bhs[i]); 460 } 461 462 static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest, 463 struct ocfs2_xattr_bucket *src) 464 { 465 int i; 466 int blocksize = src->bu_inode->i_sb->s_blocksize; 467 468 BUG_ON(dest->bu_blocks != src->bu_blocks); 469 BUG_ON(dest->bu_inode != src->bu_inode); 470 471 for (i = 0; i < src->bu_blocks; i++) { 472 memcpy(bucket_block(dest, i), bucket_block(src, i), 473 blocksize); 474 } 475 } 476 477 static int ocfs2_validate_xattr_block(struct super_block *sb, 478 struct buffer_head *bh) 479 { 480 int rc; 481 struct ocfs2_xattr_block *xb = 482 (struct ocfs2_xattr_block *)bh->b_data; 483 484 trace_ocfs2_validate_xattr_block((unsigned long long)bh->b_blocknr); 485 486 BUG_ON(!buffer_uptodate(bh)); 487 488 /* 489 * If the ecc fails, we return the error but otherwise 490 * leave the filesystem running. We know any error is 491 * local to this block. 492 */ 493 rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &xb->xb_check); 494 if (rc) 495 return rc; 496 497 /* 498 * Errors after here are fatal 499 */ 500 501 if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) { 502 return ocfs2_error(sb, 503 "Extended attribute block #%llu has bad signature %.*s\n", 504 (unsigned long long)bh->b_blocknr, 7, 505 xb->xb_signature); 506 } 507 508 if (le64_to_cpu(xb->xb_blkno) != bh->b_blocknr) { 509 return ocfs2_error(sb, 510 "Extended attribute block #%llu has an invalid xb_blkno of %llu\n", 511 (unsigned long long)bh->b_blocknr, 512 (unsigned long long)le64_to_cpu(xb->xb_blkno)); 513 } 514 515 if (le32_to_cpu(xb->xb_fs_generation) != OCFS2_SB(sb)->fs_generation) { 516 return ocfs2_error(sb, 517 "Extended attribute block #%llu has an invalid xb_fs_generation of #%u\n", 518 (unsigned long long)bh->b_blocknr, 519 le32_to_cpu(xb->xb_fs_generation)); 520 } 521 522 return 0; 523 } 524 525 static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno, 526 struct buffer_head **bh) 527 { 528 int rc; 529 struct buffer_head *tmp = *bh; 530 531 rc = ocfs2_read_block(INODE_CACHE(inode), xb_blkno, &tmp, 532 ocfs2_validate_xattr_block); 533 534 /* If ocfs2_read_block() got us a new bh, pass it up. */ 535 if (!rc && !*bh) 536 *bh = tmp; 537 538 return rc; 539 } 540 541 static inline const char *ocfs2_xattr_prefix(int name_index) 542 { 543 const struct xattr_handler *handler = NULL; 544 545 if (name_index > 0 && name_index < OCFS2_XATTR_MAX) 546 handler = ocfs2_xattr_handler_map[name_index]; 547 return handler ? xattr_prefix(handler) : NULL; 548 } 549 550 static u32 ocfs2_xattr_name_hash(struct inode *inode, 551 const char *name, 552 int name_len) 553 { 554 /* Get hash value of uuid from super block */ 555 u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash; 556 int i; 557 558 /* hash extended attribute name */ 559 for (i = 0; i < name_len; i++) { 560 hash = (hash << OCFS2_HASH_SHIFT) ^ 561 (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^ 562 *name++; 563 } 564 565 return hash; 566 } 567 568 static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len) 569 { 570 return namevalue_size(name_len, value_len) + 571 sizeof(struct ocfs2_xattr_entry); 572 } 573 574 static int ocfs2_xi_entry_usage(struct ocfs2_xattr_info *xi) 575 { 576 return namevalue_size_xi(xi) + 577 sizeof(struct ocfs2_xattr_entry); 578 } 579 580 static int ocfs2_xe_entry_usage(struct ocfs2_xattr_entry *xe) 581 { 582 return namevalue_size_xe(xe) + 583 sizeof(struct ocfs2_xattr_entry); 584 } 585 586 int ocfs2_calc_security_init(struct inode *dir, 587 struct ocfs2_security_xattr_info *si, 588 int *want_clusters, 589 int *xattr_credits, 590 struct ocfs2_alloc_context **xattr_ac) 591 { 592 int ret = 0; 593 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); 594 int s_size = ocfs2_xattr_entry_real_size(strlen(si->name), 595 si->value_len); 596 597 /* 598 * The max space of security xattr taken inline is 599 * 256(name) + 80(value) + 16(entry) = 352 bytes, 600 * So reserve one metadata block for it is ok. 601 */ 602 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE || 603 s_size > OCFS2_XATTR_FREE_IN_IBODY) { 604 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac); 605 if (ret) { 606 mlog_errno(ret); 607 return ret; 608 } 609 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; 610 } 611 612 /* reserve clusters for xattr value which will be set in B tree*/ 613 if (si->value_len > OCFS2_XATTR_INLINE_SIZE) { 614 int new_clusters = ocfs2_clusters_for_bytes(dir->i_sb, 615 si->value_len); 616 617 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb, 618 new_clusters); 619 *want_clusters += new_clusters; 620 } 621 return ret; 622 } 623 624 int ocfs2_calc_xattr_init(struct inode *dir, 625 struct buffer_head *dir_bh, 626 umode_t mode, 627 struct ocfs2_security_xattr_info *si, 628 int *want_clusters, 629 int *xattr_credits, 630 int *want_meta) 631 { 632 int ret = 0; 633 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); 634 int s_size = 0, a_size = 0, acl_len = 0, new_clusters; 635 636 if (si->enable) 637 s_size = ocfs2_xattr_entry_real_size(strlen(si->name), 638 si->value_len); 639 640 if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) { 641 down_read(&OCFS2_I(dir)->ip_xattr_sem); 642 acl_len = ocfs2_xattr_get_nolock(dir, dir_bh, 643 OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT, 644 "", NULL, 0); 645 up_read(&OCFS2_I(dir)->ip_xattr_sem); 646 if (acl_len > 0) { 647 a_size = ocfs2_xattr_entry_real_size(0, acl_len); 648 if (S_ISDIR(mode)) 649 a_size <<= 1; 650 } else if (acl_len != 0 && acl_len != -ENODATA) { 651 ret = acl_len; 652 mlog_errno(ret); 653 return ret; 654 } 655 } 656 657 if (!(s_size + a_size)) 658 return ret; 659 660 /* 661 * The max space of security xattr taken inline is 662 * 256(name) + 80(value) + 16(entry) = 352 bytes, 663 * The max space of acl xattr taken inline is 664 * 80(value) + 16(entry) * 2(if directory) = 192 bytes, 665 * when blocksize = 512, may reserve one more cluser for 666 * xattr bucket, otherwise reserve one metadata block 667 * for them is ok. 668 * If this is a new directory with inline data, 669 * we choose to reserve the entire inline area for 670 * directory contents and force an external xattr block. 671 */ 672 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE || 673 (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) || 674 (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) { 675 *want_meta = *want_meta + 1; 676 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; 677 } 678 679 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE && 680 (s_size + a_size) > OCFS2_XATTR_FREE_IN_BLOCK(dir)) { 681 *want_clusters += 1; 682 *xattr_credits += ocfs2_blocks_per_xattr_bucket(dir->i_sb); 683 } 684 685 /* 686 * reserve credits and clusters for xattrs which has large value 687 * and have to be set outside 688 */ 689 if (si->enable && si->value_len > OCFS2_XATTR_INLINE_SIZE) { 690 new_clusters = ocfs2_clusters_for_bytes(dir->i_sb, 691 si->value_len); 692 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb, 693 new_clusters); 694 *want_clusters += new_clusters; 695 } 696 if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL && 697 acl_len > OCFS2_XATTR_INLINE_SIZE) { 698 /* for directory, it has DEFAULT and ACCESS two types of acls */ 699 new_clusters = (S_ISDIR(mode) ? 2 : 1) * 700 ocfs2_clusters_for_bytes(dir->i_sb, acl_len); 701 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb, 702 new_clusters); 703 *want_clusters += new_clusters; 704 } 705 706 return ret; 707 } 708 709 static int ocfs2_xattr_extend_allocation(struct inode *inode, 710 u32 clusters_to_add, 711 struct ocfs2_xattr_value_buf *vb, 712 struct ocfs2_xattr_set_ctxt *ctxt) 713 { 714 int status = 0, credits; 715 handle_t *handle = ctxt->handle; 716 enum ocfs2_alloc_restarted why; 717 u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters); 718 struct ocfs2_extent_tree et; 719 720 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); 721 722 while (clusters_to_add) { 723 trace_ocfs2_xattr_extend_allocation(clusters_to_add); 724 725 status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh, 726 OCFS2_JOURNAL_ACCESS_WRITE); 727 if (status < 0) { 728 mlog_errno(status); 729 break; 730 } 731 732 prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters); 733 status = ocfs2_add_clusters_in_btree(handle, 734 &et, 735 &logical_start, 736 clusters_to_add, 737 0, 738 ctxt->data_ac, 739 ctxt->meta_ac, 740 &why); 741 if ((status < 0) && (status != -EAGAIN)) { 742 if (status != -ENOSPC) 743 mlog_errno(status); 744 break; 745 } 746 747 ocfs2_journal_dirty(handle, vb->vb_bh); 748 749 clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - 750 prev_clusters; 751 752 if (why != RESTART_NONE && clusters_to_add) { 753 /* 754 * We can only fail in case the alloc file doesn't give 755 * up enough clusters. 756 */ 757 BUG_ON(why == RESTART_META); 758 759 credits = ocfs2_calc_extend_credits(inode->i_sb, 760 &vb->vb_xv->xr_list); 761 status = ocfs2_extend_trans(handle, credits); 762 if (status < 0) { 763 status = -ENOMEM; 764 mlog_errno(status); 765 break; 766 } 767 } 768 } 769 770 return status; 771 } 772 773 static int __ocfs2_remove_xattr_range(struct inode *inode, 774 struct ocfs2_xattr_value_buf *vb, 775 u32 cpos, u32 phys_cpos, u32 len, 776 unsigned int ext_flags, 777 struct ocfs2_xattr_set_ctxt *ctxt) 778 { 779 int ret; 780 u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); 781 handle_t *handle = ctxt->handle; 782 struct ocfs2_extent_tree et; 783 784 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); 785 786 ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh, 787 OCFS2_JOURNAL_ACCESS_WRITE); 788 if (ret) { 789 mlog_errno(ret); 790 goto out; 791 } 792 793 ret = ocfs2_remove_extent(handle, &et, cpos, len, ctxt->meta_ac, 794 &ctxt->dealloc); 795 if (ret) { 796 mlog_errno(ret); 797 goto out; 798 } 799 800 le32_add_cpu(&vb->vb_xv->xr_clusters, -len); 801 ocfs2_journal_dirty(handle, vb->vb_bh); 802 803 if (ext_flags & OCFS2_EXT_REFCOUNTED) 804 ret = ocfs2_decrease_refcount(inode, handle, 805 ocfs2_blocks_to_clusters(inode->i_sb, 806 phys_blkno), 807 len, ctxt->meta_ac, &ctxt->dealloc, 1); 808 else 809 ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc, 810 phys_blkno, len); 811 if (ret) 812 mlog_errno(ret); 813 814 out: 815 return ret; 816 } 817 818 static int ocfs2_xattr_shrink_size(struct inode *inode, 819 u32 old_clusters, 820 u32 new_clusters, 821 struct ocfs2_xattr_value_buf *vb, 822 struct ocfs2_xattr_set_ctxt *ctxt) 823 { 824 int ret = 0; 825 unsigned int ext_flags; 826 u32 trunc_len, cpos, phys_cpos, alloc_size; 827 u64 block; 828 829 if (old_clusters <= new_clusters) 830 return 0; 831 832 cpos = new_clusters; 833 trunc_len = old_clusters - new_clusters; 834 while (trunc_len) { 835 ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos, 836 &alloc_size, 837 &vb->vb_xv->xr_list, &ext_flags); 838 if (ret) { 839 mlog_errno(ret); 840 goto out; 841 } 842 843 if (alloc_size > trunc_len) 844 alloc_size = trunc_len; 845 846 ret = __ocfs2_remove_xattr_range(inode, vb, cpos, 847 phys_cpos, alloc_size, 848 ext_flags, ctxt); 849 if (ret) { 850 mlog_errno(ret); 851 goto out; 852 } 853 854 block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); 855 ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), 856 block, alloc_size); 857 cpos += alloc_size; 858 trunc_len -= alloc_size; 859 } 860 861 out: 862 return ret; 863 } 864 865 static int ocfs2_xattr_value_truncate(struct inode *inode, 866 struct ocfs2_xattr_value_buf *vb, 867 int len, 868 struct ocfs2_xattr_set_ctxt *ctxt) 869 { 870 int ret; 871 u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len); 872 u32 old_clusters = le32_to_cpu(vb->vb_xv->xr_clusters); 873 874 if (new_clusters == old_clusters) 875 return 0; 876 877 if (new_clusters > old_clusters) 878 ret = ocfs2_xattr_extend_allocation(inode, 879 new_clusters - old_clusters, 880 vb, ctxt); 881 else 882 ret = ocfs2_xattr_shrink_size(inode, 883 old_clusters, new_clusters, 884 vb, ctxt); 885 886 return ret; 887 } 888 889 static int ocfs2_xattr_list_entry(struct super_block *sb, 890 char *buffer, size_t size, 891 size_t *result, int type, 892 const char *name, int name_len) 893 { 894 char *p = buffer + *result; 895 const char *prefix; 896 int prefix_len; 897 int total_len; 898 899 switch(type) { 900 case OCFS2_XATTR_INDEX_USER: 901 if (OCFS2_SB(sb)->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 902 return 0; 903 break; 904 905 case OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS: 906 case OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT: 907 if (!(sb->s_flags & SB_POSIXACL)) 908 return 0; 909 break; 910 911 case OCFS2_XATTR_INDEX_TRUSTED: 912 if (!capable(CAP_SYS_ADMIN)) 913 return 0; 914 break; 915 } 916 917 prefix = ocfs2_xattr_prefix(type); 918 if (!prefix) 919 return 0; 920 prefix_len = strlen(prefix); 921 total_len = prefix_len + name_len + 1; 922 *result += total_len; 923 924 /* we are just looking for how big our buffer needs to be */ 925 if (!size) 926 return 0; 927 928 if (*result > size) 929 return -ERANGE; 930 931 memcpy(p, prefix, prefix_len); 932 memcpy(p + prefix_len, name, name_len); 933 p[prefix_len + name_len] = '\0'; 934 935 return 0; 936 } 937 938 static int ocfs2_xattr_list_entries(struct inode *inode, 939 struct ocfs2_xattr_header *header, 940 char *buffer, size_t buffer_size) 941 { 942 size_t result = 0; 943 int i, type, ret; 944 const char *name; 945 946 for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) { 947 struct ocfs2_xattr_entry *entry = &header->xh_entries[i]; 948 type = ocfs2_xattr_get_type(entry); 949 name = (const char *)header + 950 le16_to_cpu(entry->xe_name_offset); 951 952 ret = ocfs2_xattr_list_entry(inode->i_sb, 953 buffer, buffer_size, 954 &result, type, name, 955 entry->xe_name_len); 956 if (ret) 957 return ret; 958 } 959 960 return result; 961 } 962 963 int ocfs2_has_inline_xattr_value_outside(struct inode *inode, 964 struct ocfs2_dinode *di) 965 { 966 struct ocfs2_xattr_header *xh; 967 int i; 968 969 xh = (struct ocfs2_xattr_header *) 970 ((void *)di + inode->i_sb->s_blocksize - 971 le16_to_cpu(di->i_xattr_inline_size)); 972 973 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) 974 if (!ocfs2_xattr_is_local(&xh->xh_entries[i])) 975 return 1; 976 977 return 0; 978 } 979 980 static int ocfs2_xattr_ibody_list(struct inode *inode, 981 struct ocfs2_dinode *di, 982 char *buffer, 983 size_t buffer_size) 984 { 985 struct ocfs2_xattr_header *header = NULL; 986 struct ocfs2_inode_info *oi = OCFS2_I(inode); 987 int ret = 0; 988 989 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) 990 return ret; 991 992 header = (struct ocfs2_xattr_header *) 993 ((void *)di + inode->i_sb->s_blocksize - 994 le16_to_cpu(di->i_xattr_inline_size)); 995 996 ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size); 997 998 return ret; 999 } 1000 1001 static int ocfs2_xattr_block_list(struct inode *inode, 1002 struct ocfs2_dinode *di, 1003 char *buffer, 1004 size_t buffer_size) 1005 { 1006 struct buffer_head *blk_bh = NULL; 1007 struct ocfs2_xattr_block *xb; 1008 int ret = 0; 1009 1010 if (!di->i_xattr_loc) 1011 return ret; 1012 1013 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc), 1014 &blk_bh); 1015 if (ret < 0) { 1016 mlog_errno(ret); 1017 return ret; 1018 } 1019 1020 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 1021 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 1022 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header; 1023 ret = ocfs2_xattr_list_entries(inode, header, 1024 buffer, buffer_size); 1025 } else 1026 ret = ocfs2_xattr_tree_list_index_block(inode, blk_bh, 1027 buffer, buffer_size); 1028 1029 brelse(blk_bh); 1030 1031 return ret; 1032 } 1033 1034 ssize_t ocfs2_listxattr(struct dentry *dentry, 1035 char *buffer, 1036 size_t size) 1037 { 1038 int ret = 0, i_ret = 0, b_ret = 0; 1039 struct buffer_head *di_bh = NULL; 1040 struct ocfs2_dinode *di = NULL; 1041 struct ocfs2_inode_info *oi = OCFS2_I(d_inode(dentry)); 1042 1043 if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb))) 1044 return -EOPNOTSUPP; 1045 1046 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) 1047 return ret; 1048 1049 ret = ocfs2_inode_lock(d_inode(dentry), &di_bh, 0); 1050 if (ret < 0) { 1051 mlog_errno(ret); 1052 return ret; 1053 } 1054 1055 di = (struct ocfs2_dinode *)di_bh->b_data; 1056 1057 down_read(&oi->ip_xattr_sem); 1058 i_ret = ocfs2_xattr_ibody_list(d_inode(dentry), di, buffer, size); 1059 if (i_ret < 0) 1060 b_ret = 0; 1061 else { 1062 if (buffer) { 1063 buffer += i_ret; 1064 size -= i_ret; 1065 } 1066 b_ret = ocfs2_xattr_block_list(d_inode(dentry), di, 1067 buffer, size); 1068 if (b_ret < 0) 1069 i_ret = 0; 1070 } 1071 up_read(&oi->ip_xattr_sem); 1072 ocfs2_inode_unlock(d_inode(dentry), 0); 1073 1074 brelse(di_bh); 1075 1076 return i_ret + b_ret; 1077 } 1078 1079 static int ocfs2_xattr_find_entry(int name_index, 1080 const char *name, 1081 struct ocfs2_xattr_search *xs) 1082 { 1083 struct ocfs2_xattr_entry *entry; 1084 size_t name_len; 1085 int i, cmp = 1; 1086 1087 if (name == NULL) 1088 return -EINVAL; 1089 1090 name_len = strlen(name); 1091 entry = xs->here; 1092 for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) { 1093 cmp = name_index - ocfs2_xattr_get_type(entry); 1094 if (!cmp) 1095 cmp = name_len - entry->xe_name_len; 1096 if (!cmp) 1097 cmp = memcmp(name, (xs->base + 1098 le16_to_cpu(entry->xe_name_offset)), 1099 name_len); 1100 if (cmp == 0) 1101 break; 1102 entry += 1; 1103 } 1104 xs->here = entry; 1105 1106 return cmp ? -ENODATA : 0; 1107 } 1108 1109 static int ocfs2_xattr_get_value_outside(struct inode *inode, 1110 struct ocfs2_xattr_value_root *xv, 1111 void *buffer, 1112 size_t len) 1113 { 1114 u32 cpos, p_cluster, num_clusters, bpc, clusters; 1115 u64 blkno; 1116 int i, ret = 0; 1117 size_t cplen, blocksize; 1118 struct buffer_head *bh = NULL; 1119 struct ocfs2_extent_list *el; 1120 1121 el = &xv->xr_list; 1122 clusters = le32_to_cpu(xv->xr_clusters); 1123 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 1124 blocksize = inode->i_sb->s_blocksize; 1125 1126 cpos = 0; 1127 while (cpos < clusters) { 1128 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, 1129 &num_clusters, el, NULL); 1130 if (ret) { 1131 mlog_errno(ret); 1132 goto out; 1133 } 1134 1135 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 1136 /* Copy ocfs2_xattr_value */ 1137 for (i = 0; i < num_clusters * bpc; i++, blkno++) { 1138 ret = ocfs2_read_block(INODE_CACHE(inode), blkno, 1139 &bh, NULL); 1140 if (ret) { 1141 mlog_errno(ret); 1142 goto out; 1143 } 1144 1145 cplen = len >= blocksize ? blocksize : len; 1146 memcpy(buffer, bh->b_data, cplen); 1147 len -= cplen; 1148 buffer += cplen; 1149 1150 brelse(bh); 1151 bh = NULL; 1152 if (len == 0) 1153 break; 1154 } 1155 cpos += num_clusters; 1156 } 1157 out: 1158 return ret; 1159 } 1160 1161 static int ocfs2_xattr_ibody_get(struct inode *inode, 1162 int name_index, 1163 const char *name, 1164 void *buffer, 1165 size_t buffer_size, 1166 struct ocfs2_xattr_search *xs) 1167 { 1168 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1169 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 1170 struct ocfs2_xattr_value_root *xv; 1171 size_t size; 1172 int ret = 0; 1173 1174 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) 1175 return -ENODATA; 1176 1177 xs->end = (void *)di + inode->i_sb->s_blocksize; 1178 xs->header = (struct ocfs2_xattr_header *) 1179 (xs->end - le16_to_cpu(di->i_xattr_inline_size)); 1180 xs->base = (void *)xs->header; 1181 xs->here = xs->header->xh_entries; 1182 1183 ret = ocfs2_xattr_find_entry(name_index, name, xs); 1184 if (ret) 1185 return ret; 1186 size = le64_to_cpu(xs->here->xe_value_size); 1187 if (buffer) { 1188 if (size > buffer_size) 1189 return -ERANGE; 1190 if (ocfs2_xattr_is_local(xs->here)) { 1191 memcpy(buffer, (void *)xs->base + 1192 le16_to_cpu(xs->here->xe_name_offset) + 1193 OCFS2_XATTR_SIZE(xs->here->xe_name_len), size); 1194 } else { 1195 xv = (struct ocfs2_xattr_value_root *) 1196 (xs->base + le16_to_cpu( 1197 xs->here->xe_name_offset) + 1198 OCFS2_XATTR_SIZE(xs->here->xe_name_len)); 1199 ret = ocfs2_xattr_get_value_outside(inode, xv, 1200 buffer, size); 1201 if (ret < 0) { 1202 mlog_errno(ret); 1203 return ret; 1204 } 1205 } 1206 } 1207 1208 return size; 1209 } 1210 1211 static int ocfs2_xattr_block_get(struct inode *inode, 1212 int name_index, 1213 const char *name, 1214 void *buffer, 1215 size_t buffer_size, 1216 struct ocfs2_xattr_search *xs) 1217 { 1218 struct ocfs2_xattr_block *xb; 1219 struct ocfs2_xattr_value_root *xv; 1220 size_t size; 1221 int ret = -ENODATA, name_offset, name_len, i; 1222 int uninitialized_var(block_off); 1223 1224 xs->bucket = ocfs2_xattr_bucket_new(inode); 1225 if (!xs->bucket) { 1226 ret = -ENOMEM; 1227 mlog_errno(ret); 1228 goto cleanup; 1229 } 1230 1231 ret = ocfs2_xattr_block_find(inode, name_index, name, xs); 1232 if (ret) { 1233 mlog_errno(ret); 1234 goto cleanup; 1235 } 1236 1237 if (xs->not_found) { 1238 ret = -ENODATA; 1239 goto cleanup; 1240 } 1241 1242 xb = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; 1243 size = le64_to_cpu(xs->here->xe_value_size); 1244 if (buffer) { 1245 ret = -ERANGE; 1246 if (size > buffer_size) 1247 goto cleanup; 1248 1249 name_offset = le16_to_cpu(xs->here->xe_name_offset); 1250 name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len); 1251 i = xs->here - xs->header->xh_entries; 1252 1253 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 1254 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 1255 bucket_xh(xs->bucket), 1256 i, 1257 &block_off, 1258 &name_offset); 1259 if (ret) { 1260 mlog_errno(ret); 1261 goto cleanup; 1262 } 1263 xs->base = bucket_block(xs->bucket, block_off); 1264 } 1265 if (ocfs2_xattr_is_local(xs->here)) { 1266 memcpy(buffer, (void *)xs->base + 1267 name_offset + name_len, size); 1268 } else { 1269 xv = (struct ocfs2_xattr_value_root *) 1270 (xs->base + name_offset + name_len); 1271 ret = ocfs2_xattr_get_value_outside(inode, xv, 1272 buffer, size); 1273 if (ret < 0) { 1274 mlog_errno(ret); 1275 goto cleanup; 1276 } 1277 } 1278 } 1279 ret = size; 1280 cleanup: 1281 ocfs2_xattr_bucket_free(xs->bucket); 1282 1283 brelse(xs->xattr_bh); 1284 xs->xattr_bh = NULL; 1285 return ret; 1286 } 1287 1288 int ocfs2_xattr_get_nolock(struct inode *inode, 1289 struct buffer_head *di_bh, 1290 int name_index, 1291 const char *name, 1292 void *buffer, 1293 size_t buffer_size) 1294 { 1295 int ret; 1296 struct ocfs2_dinode *di = NULL; 1297 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1298 struct ocfs2_xattr_search xis = { 1299 .not_found = -ENODATA, 1300 }; 1301 struct ocfs2_xattr_search xbs = { 1302 .not_found = -ENODATA, 1303 }; 1304 1305 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) 1306 return -EOPNOTSUPP; 1307 1308 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) 1309 return -ENODATA; 1310 1311 xis.inode_bh = xbs.inode_bh = di_bh; 1312 di = (struct ocfs2_dinode *)di_bh->b_data; 1313 1314 ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer, 1315 buffer_size, &xis); 1316 if (ret == -ENODATA && di->i_xattr_loc) 1317 ret = ocfs2_xattr_block_get(inode, name_index, name, buffer, 1318 buffer_size, &xbs); 1319 1320 return ret; 1321 } 1322 1323 /* ocfs2_xattr_get() 1324 * 1325 * Copy an extended attribute into the buffer provided. 1326 * Buffer is NULL to compute the size of buffer required. 1327 */ 1328 static int ocfs2_xattr_get(struct inode *inode, 1329 int name_index, 1330 const char *name, 1331 void *buffer, 1332 size_t buffer_size) 1333 { 1334 int ret, had_lock; 1335 struct buffer_head *di_bh = NULL; 1336 struct ocfs2_lock_holder oh; 1337 1338 had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 0, &oh); 1339 if (had_lock < 0) { 1340 mlog_errno(had_lock); 1341 return had_lock; 1342 } 1343 down_read(&OCFS2_I(inode)->ip_xattr_sem); 1344 ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index, 1345 name, buffer, buffer_size); 1346 up_read(&OCFS2_I(inode)->ip_xattr_sem); 1347 1348 ocfs2_inode_unlock_tracker(inode, 0, &oh, had_lock); 1349 1350 brelse(di_bh); 1351 1352 return ret; 1353 } 1354 1355 static int __ocfs2_xattr_set_value_outside(struct inode *inode, 1356 handle_t *handle, 1357 struct ocfs2_xattr_value_buf *vb, 1358 const void *value, 1359 int value_len) 1360 { 1361 int ret = 0, i, cp_len; 1362 u16 blocksize = inode->i_sb->s_blocksize; 1363 u32 p_cluster, num_clusters; 1364 u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 1365 u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len); 1366 u64 blkno; 1367 struct buffer_head *bh = NULL; 1368 unsigned int ext_flags; 1369 struct ocfs2_xattr_value_root *xv = vb->vb_xv; 1370 1371 BUG_ON(clusters > le32_to_cpu(xv->xr_clusters)); 1372 1373 while (cpos < clusters) { 1374 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, 1375 &num_clusters, &xv->xr_list, 1376 &ext_flags); 1377 if (ret) { 1378 mlog_errno(ret); 1379 goto out; 1380 } 1381 1382 BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED); 1383 1384 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 1385 1386 for (i = 0; i < num_clusters * bpc; i++, blkno++) { 1387 ret = ocfs2_read_block(INODE_CACHE(inode), blkno, 1388 &bh, NULL); 1389 if (ret) { 1390 mlog_errno(ret); 1391 goto out; 1392 } 1393 1394 ret = ocfs2_journal_access(handle, 1395 INODE_CACHE(inode), 1396 bh, 1397 OCFS2_JOURNAL_ACCESS_WRITE); 1398 if (ret < 0) { 1399 mlog_errno(ret); 1400 goto out; 1401 } 1402 1403 cp_len = value_len > blocksize ? blocksize : value_len; 1404 memcpy(bh->b_data, value, cp_len); 1405 value_len -= cp_len; 1406 value += cp_len; 1407 if (cp_len < blocksize) 1408 memset(bh->b_data + cp_len, 0, 1409 blocksize - cp_len); 1410 1411 ocfs2_journal_dirty(handle, bh); 1412 brelse(bh); 1413 bh = NULL; 1414 1415 /* 1416 * XXX: do we need to empty all the following 1417 * blocks in this cluster? 1418 */ 1419 if (!value_len) 1420 break; 1421 } 1422 cpos += num_clusters; 1423 } 1424 out: 1425 brelse(bh); 1426 1427 return ret; 1428 } 1429 1430 static int ocfs2_xa_check_space_helper(int needed_space, int free_start, 1431 int num_entries) 1432 { 1433 int free_space; 1434 1435 if (!needed_space) 1436 return 0; 1437 1438 free_space = free_start - 1439 sizeof(struct ocfs2_xattr_header) - 1440 (num_entries * sizeof(struct ocfs2_xattr_entry)) - 1441 OCFS2_XATTR_HEADER_GAP; 1442 if (free_space < 0) 1443 return -EIO; 1444 if (free_space < needed_space) 1445 return -ENOSPC; 1446 1447 return 0; 1448 } 1449 1450 static int ocfs2_xa_journal_access(handle_t *handle, struct ocfs2_xa_loc *loc, 1451 int type) 1452 { 1453 return loc->xl_ops->xlo_journal_access(handle, loc, type); 1454 } 1455 1456 static void ocfs2_xa_journal_dirty(handle_t *handle, struct ocfs2_xa_loc *loc) 1457 { 1458 loc->xl_ops->xlo_journal_dirty(handle, loc); 1459 } 1460 1461 /* Give a pointer into the storage for the given offset */ 1462 static void *ocfs2_xa_offset_pointer(struct ocfs2_xa_loc *loc, int offset) 1463 { 1464 BUG_ON(offset >= loc->xl_size); 1465 return loc->xl_ops->xlo_offset_pointer(loc, offset); 1466 } 1467 1468 /* 1469 * Wipe the name+value pair and allow the storage to reclaim it. This 1470 * must be followed by either removal of the entry or a call to 1471 * ocfs2_xa_add_namevalue(). 1472 */ 1473 static void ocfs2_xa_wipe_namevalue(struct ocfs2_xa_loc *loc) 1474 { 1475 loc->xl_ops->xlo_wipe_namevalue(loc); 1476 } 1477 1478 /* 1479 * Find lowest offset to a name+value pair. This is the start of our 1480 * downward-growing free space. 1481 */ 1482 static int ocfs2_xa_get_free_start(struct ocfs2_xa_loc *loc) 1483 { 1484 return loc->xl_ops->xlo_get_free_start(loc); 1485 } 1486 1487 /* Can we reuse loc->xl_entry for xi? */ 1488 static int ocfs2_xa_can_reuse_entry(struct ocfs2_xa_loc *loc, 1489 struct ocfs2_xattr_info *xi) 1490 { 1491 return loc->xl_ops->xlo_can_reuse(loc, xi); 1492 } 1493 1494 /* How much free space is needed to set the new value */ 1495 static int ocfs2_xa_check_space(struct ocfs2_xa_loc *loc, 1496 struct ocfs2_xattr_info *xi) 1497 { 1498 return loc->xl_ops->xlo_check_space(loc, xi); 1499 } 1500 1501 static void ocfs2_xa_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) 1502 { 1503 loc->xl_ops->xlo_add_entry(loc, name_hash); 1504 loc->xl_entry->xe_name_hash = cpu_to_le32(name_hash); 1505 /* 1506 * We can't leave the new entry's xe_name_offset at zero or 1507 * add_namevalue() will go nuts. We set it to the size of our 1508 * storage so that it can never be less than any other entry. 1509 */ 1510 loc->xl_entry->xe_name_offset = cpu_to_le16(loc->xl_size); 1511 } 1512 1513 static void ocfs2_xa_add_namevalue(struct ocfs2_xa_loc *loc, 1514 struct ocfs2_xattr_info *xi) 1515 { 1516 int size = namevalue_size_xi(xi); 1517 int nameval_offset; 1518 char *nameval_buf; 1519 1520 loc->xl_ops->xlo_add_namevalue(loc, size); 1521 loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len); 1522 loc->xl_entry->xe_name_len = xi->xi_name_len; 1523 ocfs2_xattr_set_type(loc->xl_entry, xi->xi_name_index); 1524 ocfs2_xattr_set_local(loc->xl_entry, 1525 xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE); 1526 1527 nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 1528 nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset); 1529 memset(nameval_buf, 0, size); 1530 memcpy(nameval_buf, xi->xi_name, xi->xi_name_len); 1531 } 1532 1533 static void ocfs2_xa_fill_value_buf(struct ocfs2_xa_loc *loc, 1534 struct ocfs2_xattr_value_buf *vb) 1535 { 1536 int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 1537 int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len); 1538 1539 /* Value bufs are for value trees */ 1540 BUG_ON(ocfs2_xattr_is_local(loc->xl_entry)); 1541 BUG_ON(namevalue_size_xe(loc->xl_entry) != 1542 (name_size + OCFS2_XATTR_ROOT_SIZE)); 1543 1544 loc->xl_ops->xlo_fill_value_buf(loc, vb); 1545 vb->vb_xv = 1546 (struct ocfs2_xattr_value_root *)ocfs2_xa_offset_pointer(loc, 1547 nameval_offset + 1548 name_size); 1549 } 1550 1551 static int ocfs2_xa_block_journal_access(handle_t *handle, 1552 struct ocfs2_xa_loc *loc, int type) 1553 { 1554 struct buffer_head *bh = loc->xl_storage; 1555 ocfs2_journal_access_func access; 1556 1557 if (loc->xl_size == (bh->b_size - 1558 offsetof(struct ocfs2_xattr_block, 1559 xb_attrs.xb_header))) 1560 access = ocfs2_journal_access_xb; 1561 else 1562 access = ocfs2_journal_access_di; 1563 return access(handle, INODE_CACHE(loc->xl_inode), bh, type); 1564 } 1565 1566 static void ocfs2_xa_block_journal_dirty(handle_t *handle, 1567 struct ocfs2_xa_loc *loc) 1568 { 1569 struct buffer_head *bh = loc->xl_storage; 1570 1571 ocfs2_journal_dirty(handle, bh); 1572 } 1573 1574 static void *ocfs2_xa_block_offset_pointer(struct ocfs2_xa_loc *loc, 1575 int offset) 1576 { 1577 return (char *)loc->xl_header + offset; 1578 } 1579 1580 static int ocfs2_xa_block_can_reuse(struct ocfs2_xa_loc *loc, 1581 struct ocfs2_xattr_info *xi) 1582 { 1583 /* 1584 * Block storage is strict. If the sizes aren't exact, we will 1585 * remove the old one and reinsert the new. 1586 */ 1587 return namevalue_size_xe(loc->xl_entry) == 1588 namevalue_size_xi(xi); 1589 } 1590 1591 static int ocfs2_xa_block_get_free_start(struct ocfs2_xa_loc *loc) 1592 { 1593 struct ocfs2_xattr_header *xh = loc->xl_header; 1594 int i, count = le16_to_cpu(xh->xh_count); 1595 int offset, free_start = loc->xl_size; 1596 1597 for (i = 0; i < count; i++) { 1598 offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset); 1599 if (offset < free_start) 1600 free_start = offset; 1601 } 1602 1603 return free_start; 1604 } 1605 1606 static int ocfs2_xa_block_check_space(struct ocfs2_xa_loc *loc, 1607 struct ocfs2_xattr_info *xi) 1608 { 1609 int count = le16_to_cpu(loc->xl_header->xh_count); 1610 int free_start = ocfs2_xa_get_free_start(loc); 1611 int needed_space = ocfs2_xi_entry_usage(xi); 1612 1613 /* 1614 * Block storage will reclaim the original entry before inserting 1615 * the new value, so we only need the difference. If the new 1616 * entry is smaller than the old one, we don't need anything. 1617 */ 1618 if (loc->xl_entry) { 1619 /* Don't need space if we're reusing! */ 1620 if (ocfs2_xa_can_reuse_entry(loc, xi)) 1621 needed_space = 0; 1622 else 1623 needed_space -= ocfs2_xe_entry_usage(loc->xl_entry); 1624 } 1625 if (needed_space < 0) 1626 needed_space = 0; 1627 return ocfs2_xa_check_space_helper(needed_space, free_start, count); 1628 } 1629 1630 /* 1631 * Block storage for xattrs keeps the name+value pairs compacted. When 1632 * we remove one, we have to shift any that preceded it towards the end. 1633 */ 1634 static void ocfs2_xa_block_wipe_namevalue(struct ocfs2_xa_loc *loc) 1635 { 1636 int i, offset; 1637 int namevalue_offset, first_namevalue_offset, namevalue_size; 1638 struct ocfs2_xattr_entry *entry = loc->xl_entry; 1639 struct ocfs2_xattr_header *xh = loc->xl_header; 1640 int count = le16_to_cpu(xh->xh_count); 1641 1642 namevalue_offset = le16_to_cpu(entry->xe_name_offset); 1643 namevalue_size = namevalue_size_xe(entry); 1644 first_namevalue_offset = ocfs2_xa_get_free_start(loc); 1645 1646 /* Shift the name+value pairs */ 1647 memmove((char *)xh + first_namevalue_offset + namevalue_size, 1648 (char *)xh + first_namevalue_offset, 1649 namevalue_offset - first_namevalue_offset); 1650 memset((char *)xh + first_namevalue_offset, 0, namevalue_size); 1651 1652 /* Now tell xh->xh_entries about it */ 1653 for (i = 0; i < count; i++) { 1654 offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset); 1655 if (offset <= namevalue_offset) 1656 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, 1657 namevalue_size); 1658 } 1659 1660 /* 1661 * Note that we don't update xh_free_start or xh_name_value_len 1662 * because they're not used in block-stored xattrs. 1663 */ 1664 } 1665 1666 static void ocfs2_xa_block_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) 1667 { 1668 int count = le16_to_cpu(loc->xl_header->xh_count); 1669 loc->xl_entry = &(loc->xl_header->xh_entries[count]); 1670 le16_add_cpu(&loc->xl_header->xh_count, 1); 1671 memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry)); 1672 } 1673 1674 static void ocfs2_xa_block_add_namevalue(struct ocfs2_xa_loc *loc, int size) 1675 { 1676 int free_start = ocfs2_xa_get_free_start(loc); 1677 1678 loc->xl_entry->xe_name_offset = cpu_to_le16(free_start - size); 1679 } 1680 1681 static void ocfs2_xa_block_fill_value_buf(struct ocfs2_xa_loc *loc, 1682 struct ocfs2_xattr_value_buf *vb) 1683 { 1684 struct buffer_head *bh = loc->xl_storage; 1685 1686 if (loc->xl_size == (bh->b_size - 1687 offsetof(struct ocfs2_xattr_block, 1688 xb_attrs.xb_header))) 1689 vb->vb_access = ocfs2_journal_access_xb; 1690 else 1691 vb->vb_access = ocfs2_journal_access_di; 1692 vb->vb_bh = bh; 1693 } 1694 1695 /* 1696 * Operations for xattrs stored in blocks. This includes inline inode 1697 * storage and unindexed ocfs2_xattr_blocks. 1698 */ 1699 static const struct ocfs2_xa_loc_operations ocfs2_xa_block_loc_ops = { 1700 .xlo_journal_access = ocfs2_xa_block_journal_access, 1701 .xlo_journal_dirty = ocfs2_xa_block_journal_dirty, 1702 .xlo_offset_pointer = ocfs2_xa_block_offset_pointer, 1703 .xlo_check_space = ocfs2_xa_block_check_space, 1704 .xlo_can_reuse = ocfs2_xa_block_can_reuse, 1705 .xlo_get_free_start = ocfs2_xa_block_get_free_start, 1706 .xlo_wipe_namevalue = ocfs2_xa_block_wipe_namevalue, 1707 .xlo_add_entry = ocfs2_xa_block_add_entry, 1708 .xlo_add_namevalue = ocfs2_xa_block_add_namevalue, 1709 .xlo_fill_value_buf = ocfs2_xa_block_fill_value_buf, 1710 }; 1711 1712 static int ocfs2_xa_bucket_journal_access(handle_t *handle, 1713 struct ocfs2_xa_loc *loc, int type) 1714 { 1715 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1716 1717 return ocfs2_xattr_bucket_journal_access(handle, bucket, type); 1718 } 1719 1720 static void ocfs2_xa_bucket_journal_dirty(handle_t *handle, 1721 struct ocfs2_xa_loc *loc) 1722 { 1723 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1724 1725 ocfs2_xattr_bucket_journal_dirty(handle, bucket); 1726 } 1727 1728 static void *ocfs2_xa_bucket_offset_pointer(struct ocfs2_xa_loc *loc, 1729 int offset) 1730 { 1731 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1732 int block, block_offset; 1733 1734 /* The header is at the front of the bucket */ 1735 block = offset >> loc->xl_inode->i_sb->s_blocksize_bits; 1736 block_offset = offset % loc->xl_inode->i_sb->s_blocksize; 1737 1738 return bucket_block(bucket, block) + block_offset; 1739 } 1740 1741 static int ocfs2_xa_bucket_can_reuse(struct ocfs2_xa_loc *loc, 1742 struct ocfs2_xattr_info *xi) 1743 { 1744 return namevalue_size_xe(loc->xl_entry) >= 1745 namevalue_size_xi(xi); 1746 } 1747 1748 static int ocfs2_xa_bucket_get_free_start(struct ocfs2_xa_loc *loc) 1749 { 1750 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1751 return le16_to_cpu(bucket_xh(bucket)->xh_free_start); 1752 } 1753 1754 static int ocfs2_bucket_align_free_start(struct super_block *sb, 1755 int free_start, int size) 1756 { 1757 /* 1758 * We need to make sure that the name+value pair fits within 1759 * one block. 1760 */ 1761 if (((free_start - size) >> sb->s_blocksize_bits) != 1762 ((free_start - 1) >> sb->s_blocksize_bits)) 1763 free_start -= free_start % sb->s_blocksize; 1764 1765 return free_start; 1766 } 1767 1768 static int ocfs2_xa_bucket_check_space(struct ocfs2_xa_loc *loc, 1769 struct ocfs2_xattr_info *xi) 1770 { 1771 int rc; 1772 int count = le16_to_cpu(loc->xl_header->xh_count); 1773 int free_start = ocfs2_xa_get_free_start(loc); 1774 int needed_space = ocfs2_xi_entry_usage(xi); 1775 int size = namevalue_size_xi(xi); 1776 struct super_block *sb = loc->xl_inode->i_sb; 1777 1778 /* 1779 * Bucket storage does not reclaim name+value pairs it cannot 1780 * reuse. They live as holes until the bucket fills, and then 1781 * the bucket is defragmented. However, the bucket can reclaim 1782 * the ocfs2_xattr_entry. 1783 */ 1784 if (loc->xl_entry) { 1785 /* Don't need space if we're reusing! */ 1786 if (ocfs2_xa_can_reuse_entry(loc, xi)) 1787 needed_space = 0; 1788 else 1789 needed_space -= sizeof(struct ocfs2_xattr_entry); 1790 } 1791 BUG_ON(needed_space < 0); 1792 1793 if (free_start < size) { 1794 if (needed_space) 1795 return -ENOSPC; 1796 } else { 1797 /* 1798 * First we check if it would fit in the first place. 1799 * Below, we align the free start to a block. This may 1800 * slide us below the minimum gap. By checking unaligned 1801 * first, we avoid that error. 1802 */ 1803 rc = ocfs2_xa_check_space_helper(needed_space, free_start, 1804 count); 1805 if (rc) 1806 return rc; 1807 free_start = ocfs2_bucket_align_free_start(sb, free_start, 1808 size); 1809 } 1810 return ocfs2_xa_check_space_helper(needed_space, free_start, count); 1811 } 1812 1813 static void ocfs2_xa_bucket_wipe_namevalue(struct ocfs2_xa_loc *loc) 1814 { 1815 le16_add_cpu(&loc->xl_header->xh_name_value_len, 1816 -namevalue_size_xe(loc->xl_entry)); 1817 } 1818 1819 static void ocfs2_xa_bucket_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) 1820 { 1821 struct ocfs2_xattr_header *xh = loc->xl_header; 1822 int count = le16_to_cpu(xh->xh_count); 1823 int low = 0, high = count - 1, tmp; 1824 struct ocfs2_xattr_entry *tmp_xe; 1825 1826 /* 1827 * We keep buckets sorted by name_hash, so we need to find 1828 * our insert place. 1829 */ 1830 while (low <= high && count) { 1831 tmp = (low + high) / 2; 1832 tmp_xe = &xh->xh_entries[tmp]; 1833 1834 if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash)) 1835 low = tmp + 1; 1836 else if (name_hash < le32_to_cpu(tmp_xe->xe_name_hash)) 1837 high = tmp - 1; 1838 else { 1839 low = tmp; 1840 break; 1841 } 1842 } 1843 1844 if (low != count) 1845 memmove(&xh->xh_entries[low + 1], 1846 &xh->xh_entries[low], 1847 ((count - low) * sizeof(struct ocfs2_xattr_entry))); 1848 1849 le16_add_cpu(&xh->xh_count, 1); 1850 loc->xl_entry = &xh->xh_entries[low]; 1851 memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry)); 1852 } 1853 1854 static void ocfs2_xa_bucket_add_namevalue(struct ocfs2_xa_loc *loc, int size) 1855 { 1856 int free_start = ocfs2_xa_get_free_start(loc); 1857 struct ocfs2_xattr_header *xh = loc->xl_header; 1858 struct super_block *sb = loc->xl_inode->i_sb; 1859 int nameval_offset; 1860 1861 free_start = ocfs2_bucket_align_free_start(sb, free_start, size); 1862 nameval_offset = free_start - size; 1863 loc->xl_entry->xe_name_offset = cpu_to_le16(nameval_offset); 1864 xh->xh_free_start = cpu_to_le16(nameval_offset); 1865 le16_add_cpu(&xh->xh_name_value_len, size); 1866 1867 } 1868 1869 static void ocfs2_xa_bucket_fill_value_buf(struct ocfs2_xa_loc *loc, 1870 struct ocfs2_xattr_value_buf *vb) 1871 { 1872 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1873 struct super_block *sb = loc->xl_inode->i_sb; 1874 int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 1875 int size = namevalue_size_xe(loc->xl_entry); 1876 int block_offset = nameval_offset >> sb->s_blocksize_bits; 1877 1878 /* Values are not allowed to straddle block boundaries */ 1879 BUG_ON(block_offset != 1880 ((nameval_offset + size - 1) >> sb->s_blocksize_bits)); 1881 /* We expect the bucket to be filled in */ 1882 BUG_ON(!bucket->bu_bhs[block_offset]); 1883 1884 vb->vb_access = ocfs2_journal_access; 1885 vb->vb_bh = bucket->bu_bhs[block_offset]; 1886 } 1887 1888 /* Operations for xattrs stored in buckets. */ 1889 static const struct ocfs2_xa_loc_operations ocfs2_xa_bucket_loc_ops = { 1890 .xlo_journal_access = ocfs2_xa_bucket_journal_access, 1891 .xlo_journal_dirty = ocfs2_xa_bucket_journal_dirty, 1892 .xlo_offset_pointer = ocfs2_xa_bucket_offset_pointer, 1893 .xlo_check_space = ocfs2_xa_bucket_check_space, 1894 .xlo_can_reuse = ocfs2_xa_bucket_can_reuse, 1895 .xlo_get_free_start = ocfs2_xa_bucket_get_free_start, 1896 .xlo_wipe_namevalue = ocfs2_xa_bucket_wipe_namevalue, 1897 .xlo_add_entry = ocfs2_xa_bucket_add_entry, 1898 .xlo_add_namevalue = ocfs2_xa_bucket_add_namevalue, 1899 .xlo_fill_value_buf = ocfs2_xa_bucket_fill_value_buf, 1900 }; 1901 1902 static unsigned int ocfs2_xa_value_clusters(struct ocfs2_xa_loc *loc) 1903 { 1904 struct ocfs2_xattr_value_buf vb; 1905 1906 if (ocfs2_xattr_is_local(loc->xl_entry)) 1907 return 0; 1908 1909 ocfs2_xa_fill_value_buf(loc, &vb); 1910 return le32_to_cpu(vb.vb_xv->xr_clusters); 1911 } 1912 1913 static int ocfs2_xa_value_truncate(struct ocfs2_xa_loc *loc, u64 bytes, 1914 struct ocfs2_xattr_set_ctxt *ctxt) 1915 { 1916 int trunc_rc, access_rc; 1917 struct ocfs2_xattr_value_buf vb; 1918 1919 ocfs2_xa_fill_value_buf(loc, &vb); 1920 trunc_rc = ocfs2_xattr_value_truncate(loc->xl_inode, &vb, bytes, 1921 ctxt); 1922 1923 /* 1924 * The caller of ocfs2_xa_value_truncate() has already called 1925 * ocfs2_xa_journal_access on the loc. However, The truncate code 1926 * calls ocfs2_extend_trans(). This may commit the previous 1927 * transaction and open a new one. If this is a bucket, truncate 1928 * could leave only vb->vb_bh set up for journaling. Meanwhile, 1929 * the caller is expecting to dirty the entire bucket. So we must 1930 * reset the journal work. We do this even if truncate has failed, 1931 * as it could have failed after committing the extend. 1932 */ 1933 access_rc = ocfs2_xa_journal_access(ctxt->handle, loc, 1934 OCFS2_JOURNAL_ACCESS_WRITE); 1935 1936 /* Errors in truncate take precedence */ 1937 return trunc_rc ? trunc_rc : access_rc; 1938 } 1939 1940 static void ocfs2_xa_remove_entry(struct ocfs2_xa_loc *loc) 1941 { 1942 int index, count; 1943 struct ocfs2_xattr_header *xh = loc->xl_header; 1944 struct ocfs2_xattr_entry *entry = loc->xl_entry; 1945 1946 ocfs2_xa_wipe_namevalue(loc); 1947 loc->xl_entry = NULL; 1948 1949 le16_add_cpu(&xh->xh_count, -1); 1950 count = le16_to_cpu(xh->xh_count); 1951 1952 /* 1953 * Only zero out the entry if there are more remaining. This is 1954 * important for an empty bucket, as it keeps track of the 1955 * bucket's hash value. It doesn't hurt empty block storage. 1956 */ 1957 if (count) { 1958 index = ((char *)entry - (char *)&xh->xh_entries) / 1959 sizeof(struct ocfs2_xattr_entry); 1960 memmove(&xh->xh_entries[index], &xh->xh_entries[index + 1], 1961 (count - index) * sizeof(struct ocfs2_xattr_entry)); 1962 memset(&xh->xh_entries[count], 0, 1963 sizeof(struct ocfs2_xattr_entry)); 1964 } 1965 } 1966 1967 /* 1968 * If we have a problem adjusting the size of an external value during 1969 * ocfs2_xa_prepare_entry() or ocfs2_xa_remove(), we may have an xattr 1970 * in an intermediate state. For example, the value may be partially 1971 * truncated. 1972 * 1973 * If the value tree hasn't changed, the extend/truncate went nowhere. 1974 * We have nothing to do. The caller can treat it as a straight error. 1975 * 1976 * If the value tree got partially truncated, we now have a corrupted 1977 * extended attribute. We're going to wipe its entry and leak the 1978 * clusters. Better to leak some storage than leave a corrupt entry. 1979 * 1980 * If the value tree grew, it obviously didn't grow enough for the 1981 * new entry. We're not going to try and reclaim those clusters either. 1982 * If there was already an external value there (orig_clusters != 0), 1983 * the new clusters are attached safely and we can just leave the old 1984 * value in place. If there was no external value there, we remove 1985 * the entry. 1986 * 1987 * This way, the xattr block we store in the journal will be consistent. 1988 * If the size change broke because of the journal, no changes will hit 1989 * disk anyway. 1990 */ 1991 static void ocfs2_xa_cleanup_value_truncate(struct ocfs2_xa_loc *loc, 1992 const char *what, 1993 unsigned int orig_clusters) 1994 { 1995 unsigned int new_clusters = ocfs2_xa_value_clusters(loc); 1996 char *nameval_buf = ocfs2_xa_offset_pointer(loc, 1997 le16_to_cpu(loc->xl_entry->xe_name_offset)); 1998 1999 if (new_clusters < orig_clusters) { 2000 mlog(ML_ERROR, 2001 "Partial truncate while %s xattr %.*s. Leaking " 2002 "%u clusters and removing the entry\n", 2003 what, loc->xl_entry->xe_name_len, nameval_buf, 2004 orig_clusters - new_clusters); 2005 ocfs2_xa_remove_entry(loc); 2006 } else if (!orig_clusters) { 2007 mlog(ML_ERROR, 2008 "Unable to allocate an external value for xattr " 2009 "%.*s safely. Leaking %u clusters and removing the " 2010 "entry\n", 2011 loc->xl_entry->xe_name_len, nameval_buf, 2012 new_clusters - orig_clusters); 2013 ocfs2_xa_remove_entry(loc); 2014 } else if (new_clusters > orig_clusters) 2015 mlog(ML_ERROR, 2016 "Unable to grow xattr %.*s safely. %u new clusters " 2017 "have been added, but the value will not be " 2018 "modified\n", 2019 loc->xl_entry->xe_name_len, nameval_buf, 2020 new_clusters - orig_clusters); 2021 } 2022 2023 static int ocfs2_xa_remove(struct ocfs2_xa_loc *loc, 2024 struct ocfs2_xattr_set_ctxt *ctxt) 2025 { 2026 int rc = 0; 2027 unsigned int orig_clusters; 2028 2029 if (!ocfs2_xattr_is_local(loc->xl_entry)) { 2030 orig_clusters = ocfs2_xa_value_clusters(loc); 2031 rc = ocfs2_xa_value_truncate(loc, 0, ctxt); 2032 if (rc) { 2033 mlog_errno(rc); 2034 /* 2035 * Since this is remove, we can return 0 if 2036 * ocfs2_xa_cleanup_value_truncate() is going to 2037 * wipe the entry anyway. So we check the 2038 * cluster count as well. 2039 */ 2040 if (orig_clusters != ocfs2_xa_value_clusters(loc)) 2041 rc = 0; 2042 ocfs2_xa_cleanup_value_truncate(loc, "removing", 2043 orig_clusters); 2044 if (rc) 2045 goto out; 2046 } 2047 } 2048 2049 ocfs2_xa_remove_entry(loc); 2050 2051 out: 2052 return rc; 2053 } 2054 2055 static void ocfs2_xa_install_value_root(struct ocfs2_xa_loc *loc) 2056 { 2057 int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len); 2058 char *nameval_buf; 2059 2060 nameval_buf = ocfs2_xa_offset_pointer(loc, 2061 le16_to_cpu(loc->xl_entry->xe_name_offset)); 2062 memcpy(nameval_buf + name_size, &def_xv, OCFS2_XATTR_ROOT_SIZE); 2063 } 2064 2065 /* 2066 * Take an existing entry and make it ready for the new value. This 2067 * won't allocate space, but it may free space. It should be ready for 2068 * ocfs2_xa_prepare_entry() to finish the work. 2069 */ 2070 static int ocfs2_xa_reuse_entry(struct ocfs2_xa_loc *loc, 2071 struct ocfs2_xattr_info *xi, 2072 struct ocfs2_xattr_set_ctxt *ctxt) 2073 { 2074 int rc = 0; 2075 int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len); 2076 unsigned int orig_clusters; 2077 char *nameval_buf; 2078 int xe_local = ocfs2_xattr_is_local(loc->xl_entry); 2079 int xi_local = xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE; 2080 2081 BUG_ON(OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len) != 2082 name_size); 2083 2084 nameval_buf = ocfs2_xa_offset_pointer(loc, 2085 le16_to_cpu(loc->xl_entry->xe_name_offset)); 2086 if (xe_local) { 2087 memset(nameval_buf + name_size, 0, 2088 namevalue_size_xe(loc->xl_entry) - name_size); 2089 if (!xi_local) 2090 ocfs2_xa_install_value_root(loc); 2091 } else { 2092 orig_clusters = ocfs2_xa_value_clusters(loc); 2093 if (xi_local) { 2094 rc = ocfs2_xa_value_truncate(loc, 0, ctxt); 2095 if (rc < 0) 2096 mlog_errno(rc); 2097 else 2098 memset(nameval_buf + name_size, 0, 2099 namevalue_size_xe(loc->xl_entry) - 2100 name_size); 2101 } else if (le64_to_cpu(loc->xl_entry->xe_value_size) > 2102 xi->xi_value_len) { 2103 rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, 2104 ctxt); 2105 if (rc < 0) 2106 mlog_errno(rc); 2107 } 2108 2109 if (rc) { 2110 ocfs2_xa_cleanup_value_truncate(loc, "reusing", 2111 orig_clusters); 2112 goto out; 2113 } 2114 } 2115 2116 loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len); 2117 ocfs2_xattr_set_local(loc->xl_entry, xi_local); 2118 2119 out: 2120 return rc; 2121 } 2122 2123 /* 2124 * Prepares loc->xl_entry to receive the new xattr. This includes 2125 * properly setting up the name+value pair region. If loc->xl_entry 2126 * already exists, it will take care of modifying it appropriately. 2127 * 2128 * Note that this modifies the data. You did journal_access already, 2129 * right? 2130 */ 2131 static int ocfs2_xa_prepare_entry(struct ocfs2_xa_loc *loc, 2132 struct ocfs2_xattr_info *xi, 2133 u32 name_hash, 2134 struct ocfs2_xattr_set_ctxt *ctxt) 2135 { 2136 int rc = 0; 2137 unsigned int orig_clusters; 2138 __le64 orig_value_size = 0; 2139 2140 rc = ocfs2_xa_check_space(loc, xi); 2141 if (rc) 2142 goto out; 2143 2144 if (loc->xl_entry) { 2145 if (ocfs2_xa_can_reuse_entry(loc, xi)) { 2146 orig_value_size = loc->xl_entry->xe_value_size; 2147 rc = ocfs2_xa_reuse_entry(loc, xi, ctxt); 2148 if (rc) 2149 goto out; 2150 goto alloc_value; 2151 } 2152 2153 if (!ocfs2_xattr_is_local(loc->xl_entry)) { 2154 orig_clusters = ocfs2_xa_value_clusters(loc); 2155 rc = ocfs2_xa_value_truncate(loc, 0, ctxt); 2156 if (rc) { 2157 mlog_errno(rc); 2158 ocfs2_xa_cleanup_value_truncate(loc, 2159 "overwriting", 2160 orig_clusters); 2161 goto out; 2162 } 2163 } 2164 ocfs2_xa_wipe_namevalue(loc); 2165 } else 2166 ocfs2_xa_add_entry(loc, name_hash); 2167 2168 /* 2169 * If we get here, we have a blank entry. Fill it. We grow our 2170 * name+value pair back from the end. 2171 */ 2172 ocfs2_xa_add_namevalue(loc, xi); 2173 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) 2174 ocfs2_xa_install_value_root(loc); 2175 2176 alloc_value: 2177 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 2178 orig_clusters = ocfs2_xa_value_clusters(loc); 2179 rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, ctxt); 2180 if (rc < 0) { 2181 ctxt->set_abort = 1; 2182 ocfs2_xa_cleanup_value_truncate(loc, "growing", 2183 orig_clusters); 2184 /* 2185 * If we were growing an existing value, 2186 * ocfs2_xa_cleanup_value_truncate() won't remove 2187 * the entry. We need to restore the original value 2188 * size. 2189 */ 2190 if (loc->xl_entry) { 2191 BUG_ON(!orig_value_size); 2192 loc->xl_entry->xe_value_size = orig_value_size; 2193 } 2194 mlog_errno(rc); 2195 } 2196 } 2197 2198 out: 2199 return rc; 2200 } 2201 2202 /* 2203 * Store the value portion of the name+value pair. This will skip 2204 * values that are stored externally. Their tree roots were set up 2205 * by ocfs2_xa_prepare_entry(). 2206 */ 2207 static int ocfs2_xa_store_value(struct ocfs2_xa_loc *loc, 2208 struct ocfs2_xattr_info *xi, 2209 struct ocfs2_xattr_set_ctxt *ctxt) 2210 { 2211 int rc = 0; 2212 int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 2213 int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len); 2214 char *nameval_buf; 2215 struct ocfs2_xattr_value_buf vb; 2216 2217 nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset); 2218 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 2219 ocfs2_xa_fill_value_buf(loc, &vb); 2220 rc = __ocfs2_xattr_set_value_outside(loc->xl_inode, 2221 ctxt->handle, &vb, 2222 xi->xi_value, 2223 xi->xi_value_len); 2224 } else 2225 memcpy(nameval_buf + name_size, xi->xi_value, xi->xi_value_len); 2226 2227 return rc; 2228 } 2229 2230 static int ocfs2_xa_set(struct ocfs2_xa_loc *loc, 2231 struct ocfs2_xattr_info *xi, 2232 struct ocfs2_xattr_set_ctxt *ctxt) 2233 { 2234 int ret; 2235 u32 name_hash = ocfs2_xattr_name_hash(loc->xl_inode, xi->xi_name, 2236 xi->xi_name_len); 2237 2238 ret = ocfs2_xa_journal_access(ctxt->handle, loc, 2239 OCFS2_JOURNAL_ACCESS_WRITE); 2240 if (ret) { 2241 mlog_errno(ret); 2242 goto out; 2243 } 2244 2245 /* 2246 * From here on out, everything is going to modify the buffer a 2247 * little. Errors are going to leave the xattr header in a 2248 * sane state. Thus, even with errors we dirty the sucker. 2249 */ 2250 2251 /* Don't worry, we are never called with !xi_value and !xl_entry */ 2252 if (!xi->xi_value) { 2253 ret = ocfs2_xa_remove(loc, ctxt); 2254 goto out_dirty; 2255 } 2256 2257 ret = ocfs2_xa_prepare_entry(loc, xi, name_hash, ctxt); 2258 if (ret) { 2259 if (ret != -ENOSPC) 2260 mlog_errno(ret); 2261 goto out_dirty; 2262 } 2263 2264 ret = ocfs2_xa_store_value(loc, xi, ctxt); 2265 if (ret) 2266 mlog_errno(ret); 2267 2268 out_dirty: 2269 ocfs2_xa_journal_dirty(ctxt->handle, loc); 2270 2271 out: 2272 return ret; 2273 } 2274 2275 static void ocfs2_init_dinode_xa_loc(struct ocfs2_xa_loc *loc, 2276 struct inode *inode, 2277 struct buffer_head *bh, 2278 struct ocfs2_xattr_entry *entry) 2279 { 2280 struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; 2281 2282 BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_XATTR_FL)); 2283 2284 loc->xl_inode = inode; 2285 loc->xl_ops = &ocfs2_xa_block_loc_ops; 2286 loc->xl_storage = bh; 2287 loc->xl_entry = entry; 2288 loc->xl_size = le16_to_cpu(di->i_xattr_inline_size); 2289 loc->xl_header = 2290 (struct ocfs2_xattr_header *)(bh->b_data + bh->b_size - 2291 loc->xl_size); 2292 } 2293 2294 static void ocfs2_init_xattr_block_xa_loc(struct ocfs2_xa_loc *loc, 2295 struct inode *inode, 2296 struct buffer_head *bh, 2297 struct ocfs2_xattr_entry *entry) 2298 { 2299 struct ocfs2_xattr_block *xb = 2300 (struct ocfs2_xattr_block *)bh->b_data; 2301 2302 BUG_ON(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED); 2303 2304 loc->xl_inode = inode; 2305 loc->xl_ops = &ocfs2_xa_block_loc_ops; 2306 loc->xl_storage = bh; 2307 loc->xl_header = &(xb->xb_attrs.xb_header); 2308 loc->xl_entry = entry; 2309 loc->xl_size = bh->b_size - offsetof(struct ocfs2_xattr_block, 2310 xb_attrs.xb_header); 2311 } 2312 2313 static void ocfs2_init_xattr_bucket_xa_loc(struct ocfs2_xa_loc *loc, 2314 struct ocfs2_xattr_bucket *bucket, 2315 struct ocfs2_xattr_entry *entry) 2316 { 2317 loc->xl_inode = bucket->bu_inode; 2318 loc->xl_ops = &ocfs2_xa_bucket_loc_ops; 2319 loc->xl_storage = bucket; 2320 loc->xl_header = bucket_xh(bucket); 2321 loc->xl_entry = entry; 2322 loc->xl_size = OCFS2_XATTR_BUCKET_SIZE; 2323 } 2324 2325 /* 2326 * In xattr remove, if it is stored outside and refcounted, we may have 2327 * the chance to split the refcount tree. So need the allocators. 2328 */ 2329 static int ocfs2_lock_xattr_remove_allocators(struct inode *inode, 2330 struct ocfs2_xattr_value_root *xv, 2331 struct ocfs2_caching_info *ref_ci, 2332 struct buffer_head *ref_root_bh, 2333 struct ocfs2_alloc_context **meta_ac, 2334 int *ref_credits) 2335 { 2336 int ret, meta_add = 0; 2337 u32 p_cluster, num_clusters; 2338 unsigned int ext_flags; 2339 2340 *ref_credits = 0; 2341 ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster, 2342 &num_clusters, 2343 &xv->xr_list, 2344 &ext_flags); 2345 if (ret) { 2346 mlog_errno(ret); 2347 goto out; 2348 } 2349 2350 if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) 2351 goto out; 2352 2353 ret = ocfs2_refcounted_xattr_delete_need(inode, ref_ci, 2354 ref_root_bh, xv, 2355 &meta_add, ref_credits); 2356 if (ret) { 2357 mlog_errno(ret); 2358 goto out; 2359 } 2360 2361 ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb), 2362 meta_add, meta_ac); 2363 if (ret) 2364 mlog_errno(ret); 2365 2366 out: 2367 return ret; 2368 } 2369 2370 static int ocfs2_remove_value_outside(struct inode*inode, 2371 struct ocfs2_xattr_value_buf *vb, 2372 struct ocfs2_xattr_header *header, 2373 struct ocfs2_caching_info *ref_ci, 2374 struct buffer_head *ref_root_bh) 2375 { 2376 int ret = 0, i, ref_credits; 2377 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2378 struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, }; 2379 void *val; 2380 2381 ocfs2_init_dealloc_ctxt(&ctxt.dealloc); 2382 2383 for (i = 0; i < le16_to_cpu(header->xh_count); i++) { 2384 struct ocfs2_xattr_entry *entry = &header->xh_entries[i]; 2385 2386 if (ocfs2_xattr_is_local(entry)) 2387 continue; 2388 2389 val = (void *)header + 2390 le16_to_cpu(entry->xe_name_offset); 2391 vb->vb_xv = (struct ocfs2_xattr_value_root *) 2392 (val + OCFS2_XATTR_SIZE(entry->xe_name_len)); 2393 2394 ret = ocfs2_lock_xattr_remove_allocators(inode, vb->vb_xv, 2395 ref_ci, ref_root_bh, 2396 &ctxt.meta_ac, 2397 &ref_credits); 2398 2399 ctxt.handle = ocfs2_start_trans(osb, ref_credits + 2400 ocfs2_remove_extent_credits(osb->sb)); 2401 if (IS_ERR(ctxt.handle)) { 2402 ret = PTR_ERR(ctxt.handle); 2403 mlog_errno(ret); 2404 break; 2405 } 2406 2407 ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt); 2408 2409 ocfs2_commit_trans(osb, ctxt.handle); 2410 if (ctxt.meta_ac) { 2411 ocfs2_free_alloc_context(ctxt.meta_ac); 2412 ctxt.meta_ac = NULL; 2413 } 2414 2415 if (ret < 0) { 2416 mlog_errno(ret); 2417 break; 2418 } 2419 2420 } 2421 2422 if (ctxt.meta_ac) 2423 ocfs2_free_alloc_context(ctxt.meta_ac); 2424 ocfs2_schedule_truncate_log_flush(osb, 1); 2425 ocfs2_run_deallocs(osb, &ctxt.dealloc); 2426 return ret; 2427 } 2428 2429 static int ocfs2_xattr_ibody_remove(struct inode *inode, 2430 struct buffer_head *di_bh, 2431 struct ocfs2_caching_info *ref_ci, 2432 struct buffer_head *ref_root_bh) 2433 { 2434 2435 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 2436 struct ocfs2_xattr_header *header; 2437 int ret; 2438 struct ocfs2_xattr_value_buf vb = { 2439 .vb_bh = di_bh, 2440 .vb_access = ocfs2_journal_access_di, 2441 }; 2442 2443 header = (struct ocfs2_xattr_header *) 2444 ((void *)di + inode->i_sb->s_blocksize - 2445 le16_to_cpu(di->i_xattr_inline_size)); 2446 2447 ret = ocfs2_remove_value_outside(inode, &vb, header, 2448 ref_ci, ref_root_bh); 2449 2450 return ret; 2451 } 2452 2453 struct ocfs2_rm_xattr_bucket_para { 2454 struct ocfs2_caching_info *ref_ci; 2455 struct buffer_head *ref_root_bh; 2456 }; 2457 2458 static int ocfs2_xattr_block_remove(struct inode *inode, 2459 struct buffer_head *blk_bh, 2460 struct ocfs2_caching_info *ref_ci, 2461 struct buffer_head *ref_root_bh) 2462 { 2463 struct ocfs2_xattr_block *xb; 2464 int ret = 0; 2465 struct ocfs2_xattr_value_buf vb = { 2466 .vb_bh = blk_bh, 2467 .vb_access = ocfs2_journal_access_xb, 2468 }; 2469 struct ocfs2_rm_xattr_bucket_para args = { 2470 .ref_ci = ref_ci, 2471 .ref_root_bh = ref_root_bh, 2472 }; 2473 2474 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 2475 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 2476 struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header); 2477 ret = ocfs2_remove_value_outside(inode, &vb, header, 2478 ref_ci, ref_root_bh); 2479 } else 2480 ret = ocfs2_iterate_xattr_index_block(inode, 2481 blk_bh, 2482 ocfs2_rm_xattr_cluster, 2483 &args); 2484 2485 return ret; 2486 } 2487 2488 static int ocfs2_xattr_free_block(struct inode *inode, 2489 u64 block, 2490 struct ocfs2_caching_info *ref_ci, 2491 struct buffer_head *ref_root_bh) 2492 { 2493 struct inode *xb_alloc_inode; 2494 struct buffer_head *xb_alloc_bh = NULL; 2495 struct buffer_head *blk_bh = NULL; 2496 struct ocfs2_xattr_block *xb; 2497 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2498 handle_t *handle; 2499 int ret = 0; 2500 u64 blk, bg_blkno; 2501 u16 bit; 2502 2503 ret = ocfs2_read_xattr_block(inode, block, &blk_bh); 2504 if (ret < 0) { 2505 mlog_errno(ret); 2506 goto out; 2507 } 2508 2509 ret = ocfs2_xattr_block_remove(inode, blk_bh, ref_ci, ref_root_bh); 2510 if (ret < 0) { 2511 mlog_errno(ret); 2512 goto out; 2513 } 2514 2515 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 2516 blk = le64_to_cpu(xb->xb_blkno); 2517 bit = le16_to_cpu(xb->xb_suballoc_bit); 2518 if (xb->xb_suballoc_loc) 2519 bg_blkno = le64_to_cpu(xb->xb_suballoc_loc); 2520 else 2521 bg_blkno = ocfs2_which_suballoc_group(blk, bit); 2522 2523 xb_alloc_inode = ocfs2_get_system_file_inode(osb, 2524 EXTENT_ALLOC_SYSTEM_INODE, 2525 le16_to_cpu(xb->xb_suballoc_slot)); 2526 if (!xb_alloc_inode) { 2527 ret = -ENOMEM; 2528 mlog_errno(ret); 2529 goto out; 2530 } 2531 inode_lock(xb_alloc_inode); 2532 2533 ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1); 2534 if (ret < 0) { 2535 mlog_errno(ret); 2536 goto out_mutex; 2537 } 2538 2539 handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE); 2540 if (IS_ERR(handle)) { 2541 ret = PTR_ERR(handle); 2542 mlog_errno(ret); 2543 goto out_unlock; 2544 } 2545 2546 ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh, 2547 bit, bg_blkno, 1); 2548 if (ret < 0) 2549 mlog_errno(ret); 2550 2551 ocfs2_commit_trans(osb, handle); 2552 out_unlock: 2553 ocfs2_inode_unlock(xb_alloc_inode, 1); 2554 brelse(xb_alloc_bh); 2555 out_mutex: 2556 inode_unlock(xb_alloc_inode); 2557 iput(xb_alloc_inode); 2558 out: 2559 brelse(blk_bh); 2560 return ret; 2561 } 2562 2563 /* 2564 * ocfs2_xattr_remove() 2565 * 2566 * Free extended attribute resources associated with this inode. 2567 */ 2568 int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh) 2569 { 2570 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2571 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 2572 struct ocfs2_refcount_tree *ref_tree = NULL; 2573 struct buffer_head *ref_root_bh = NULL; 2574 struct ocfs2_caching_info *ref_ci = NULL; 2575 handle_t *handle; 2576 int ret; 2577 2578 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) 2579 return 0; 2580 2581 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) 2582 return 0; 2583 2584 if (ocfs2_is_refcount_inode(inode)) { 2585 ret = ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb), 2586 le64_to_cpu(di->i_refcount_loc), 2587 1, &ref_tree, &ref_root_bh); 2588 if (ret) { 2589 mlog_errno(ret); 2590 goto out; 2591 } 2592 ref_ci = &ref_tree->rf_ci; 2593 2594 } 2595 2596 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 2597 ret = ocfs2_xattr_ibody_remove(inode, di_bh, 2598 ref_ci, ref_root_bh); 2599 if (ret < 0) { 2600 mlog_errno(ret); 2601 goto out; 2602 } 2603 } 2604 2605 if (di->i_xattr_loc) { 2606 ret = ocfs2_xattr_free_block(inode, 2607 le64_to_cpu(di->i_xattr_loc), 2608 ref_ci, ref_root_bh); 2609 if (ret < 0) { 2610 mlog_errno(ret); 2611 goto out; 2612 } 2613 } 2614 2615 handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), 2616 OCFS2_INODE_UPDATE_CREDITS); 2617 if (IS_ERR(handle)) { 2618 ret = PTR_ERR(handle); 2619 mlog_errno(ret); 2620 goto out; 2621 } 2622 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, 2623 OCFS2_JOURNAL_ACCESS_WRITE); 2624 if (ret) { 2625 mlog_errno(ret); 2626 goto out_commit; 2627 } 2628 2629 di->i_xattr_loc = 0; 2630 2631 spin_lock(&oi->ip_lock); 2632 oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL); 2633 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 2634 spin_unlock(&oi->ip_lock); 2635 ocfs2_update_inode_fsync_trans(handle, inode, 0); 2636 2637 ocfs2_journal_dirty(handle, di_bh); 2638 out_commit: 2639 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); 2640 out: 2641 if (ref_tree) 2642 ocfs2_unlock_refcount_tree(OCFS2_SB(inode->i_sb), ref_tree, 1); 2643 brelse(ref_root_bh); 2644 return ret; 2645 } 2646 2647 static int ocfs2_xattr_has_space_inline(struct inode *inode, 2648 struct ocfs2_dinode *di) 2649 { 2650 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2651 unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size; 2652 int free; 2653 2654 if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE) 2655 return 0; 2656 2657 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 2658 struct ocfs2_inline_data *idata = &di->id2.i_data; 2659 free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size); 2660 } else if (ocfs2_inode_is_fast_symlink(inode)) { 2661 free = ocfs2_fast_symlink_chars(inode->i_sb) - 2662 le64_to_cpu(di->i_size); 2663 } else { 2664 struct ocfs2_extent_list *el = &di->id2.i_list; 2665 free = (le16_to_cpu(el->l_count) - 2666 le16_to_cpu(el->l_next_free_rec)) * 2667 sizeof(struct ocfs2_extent_rec); 2668 } 2669 if (free >= xattrsize) 2670 return 1; 2671 2672 return 0; 2673 } 2674 2675 /* 2676 * ocfs2_xattr_ibody_find() 2677 * 2678 * Find extended attribute in inode block and 2679 * fill search info into struct ocfs2_xattr_search. 2680 */ 2681 static int ocfs2_xattr_ibody_find(struct inode *inode, 2682 int name_index, 2683 const char *name, 2684 struct ocfs2_xattr_search *xs) 2685 { 2686 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2687 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 2688 int ret; 2689 int has_space = 0; 2690 2691 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) 2692 return 0; 2693 2694 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { 2695 down_read(&oi->ip_alloc_sem); 2696 has_space = ocfs2_xattr_has_space_inline(inode, di); 2697 up_read(&oi->ip_alloc_sem); 2698 if (!has_space) 2699 return 0; 2700 } 2701 2702 xs->xattr_bh = xs->inode_bh; 2703 xs->end = (void *)di + inode->i_sb->s_blocksize; 2704 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) 2705 xs->header = (struct ocfs2_xattr_header *) 2706 (xs->end - le16_to_cpu(di->i_xattr_inline_size)); 2707 else 2708 xs->header = (struct ocfs2_xattr_header *) 2709 (xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size); 2710 xs->base = (void *)xs->header; 2711 xs->here = xs->header->xh_entries; 2712 2713 /* Find the named attribute. */ 2714 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 2715 ret = ocfs2_xattr_find_entry(name_index, name, xs); 2716 if (ret && ret != -ENODATA) 2717 return ret; 2718 xs->not_found = ret; 2719 } 2720 2721 return 0; 2722 } 2723 2724 static int ocfs2_xattr_ibody_init(struct inode *inode, 2725 struct buffer_head *di_bh, 2726 struct ocfs2_xattr_set_ctxt *ctxt) 2727 { 2728 int ret; 2729 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2730 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 2731 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2732 unsigned int xattrsize = osb->s_xattr_inline_size; 2733 2734 if (!ocfs2_xattr_has_space_inline(inode, di)) { 2735 ret = -ENOSPC; 2736 goto out; 2737 } 2738 2739 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), di_bh, 2740 OCFS2_JOURNAL_ACCESS_WRITE); 2741 if (ret) { 2742 mlog_errno(ret); 2743 goto out; 2744 } 2745 2746 /* 2747 * Adjust extent record count or inline data size 2748 * to reserve space for extended attribute. 2749 */ 2750 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 2751 struct ocfs2_inline_data *idata = &di->id2.i_data; 2752 le16_add_cpu(&idata->id_count, -xattrsize); 2753 } else if (!(ocfs2_inode_is_fast_symlink(inode))) { 2754 struct ocfs2_extent_list *el = &di->id2.i_list; 2755 le16_add_cpu(&el->l_count, -(xattrsize / 2756 sizeof(struct ocfs2_extent_rec))); 2757 } 2758 di->i_xattr_inline_size = cpu_to_le16(xattrsize); 2759 2760 spin_lock(&oi->ip_lock); 2761 oi->ip_dyn_features |= OCFS2_INLINE_XATTR_FL|OCFS2_HAS_XATTR_FL; 2762 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 2763 spin_unlock(&oi->ip_lock); 2764 2765 ocfs2_journal_dirty(ctxt->handle, di_bh); 2766 2767 out: 2768 return ret; 2769 } 2770 2771 /* 2772 * ocfs2_xattr_ibody_set() 2773 * 2774 * Set, replace or remove an extended attribute into inode block. 2775 * 2776 */ 2777 static int ocfs2_xattr_ibody_set(struct inode *inode, 2778 struct ocfs2_xattr_info *xi, 2779 struct ocfs2_xattr_search *xs, 2780 struct ocfs2_xattr_set_ctxt *ctxt) 2781 { 2782 int ret; 2783 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2784 struct ocfs2_xa_loc loc; 2785 2786 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) 2787 return -ENOSPC; 2788 2789 down_write(&oi->ip_alloc_sem); 2790 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { 2791 ret = ocfs2_xattr_ibody_init(inode, xs->inode_bh, ctxt); 2792 if (ret) { 2793 if (ret != -ENOSPC) 2794 mlog_errno(ret); 2795 goto out; 2796 } 2797 } 2798 2799 ocfs2_init_dinode_xa_loc(&loc, inode, xs->inode_bh, 2800 xs->not_found ? NULL : xs->here); 2801 ret = ocfs2_xa_set(&loc, xi, ctxt); 2802 if (ret) { 2803 if (ret != -ENOSPC) 2804 mlog_errno(ret); 2805 goto out; 2806 } 2807 xs->here = loc.xl_entry; 2808 2809 out: 2810 up_write(&oi->ip_alloc_sem); 2811 2812 return ret; 2813 } 2814 2815 /* 2816 * ocfs2_xattr_block_find() 2817 * 2818 * Find extended attribute in external block and 2819 * fill search info into struct ocfs2_xattr_search. 2820 */ 2821 static int ocfs2_xattr_block_find(struct inode *inode, 2822 int name_index, 2823 const char *name, 2824 struct ocfs2_xattr_search *xs) 2825 { 2826 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 2827 struct buffer_head *blk_bh = NULL; 2828 struct ocfs2_xattr_block *xb; 2829 int ret = 0; 2830 2831 if (!di->i_xattr_loc) 2832 return ret; 2833 2834 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc), 2835 &blk_bh); 2836 if (ret < 0) { 2837 mlog_errno(ret); 2838 return ret; 2839 } 2840 2841 xs->xattr_bh = blk_bh; 2842 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 2843 2844 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 2845 xs->header = &xb->xb_attrs.xb_header; 2846 xs->base = (void *)xs->header; 2847 xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size; 2848 xs->here = xs->header->xh_entries; 2849 2850 ret = ocfs2_xattr_find_entry(name_index, name, xs); 2851 } else 2852 ret = ocfs2_xattr_index_block_find(inode, blk_bh, 2853 name_index, 2854 name, xs); 2855 2856 if (ret && ret != -ENODATA) { 2857 xs->xattr_bh = NULL; 2858 goto cleanup; 2859 } 2860 xs->not_found = ret; 2861 return 0; 2862 cleanup: 2863 brelse(blk_bh); 2864 2865 return ret; 2866 } 2867 2868 static int ocfs2_create_xattr_block(struct inode *inode, 2869 struct buffer_head *inode_bh, 2870 struct ocfs2_xattr_set_ctxt *ctxt, 2871 int indexed, 2872 struct buffer_head **ret_bh) 2873 { 2874 int ret; 2875 u16 suballoc_bit_start; 2876 u32 num_got; 2877 u64 suballoc_loc, first_blkno; 2878 struct ocfs2_dinode *di = (struct ocfs2_dinode *)inode_bh->b_data; 2879 struct buffer_head *new_bh = NULL; 2880 struct ocfs2_xattr_block *xblk; 2881 2882 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), 2883 inode_bh, OCFS2_JOURNAL_ACCESS_CREATE); 2884 if (ret < 0) { 2885 mlog_errno(ret); 2886 goto end; 2887 } 2888 2889 ret = ocfs2_claim_metadata(ctxt->handle, ctxt->meta_ac, 1, 2890 &suballoc_loc, &suballoc_bit_start, 2891 &num_got, &first_blkno); 2892 if (ret < 0) { 2893 mlog_errno(ret); 2894 goto end; 2895 } 2896 2897 new_bh = sb_getblk(inode->i_sb, first_blkno); 2898 if (!new_bh) { 2899 ret = -ENOMEM; 2900 mlog_errno(ret); 2901 goto end; 2902 } 2903 2904 ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh); 2905 2906 ret = ocfs2_journal_access_xb(ctxt->handle, INODE_CACHE(inode), 2907 new_bh, 2908 OCFS2_JOURNAL_ACCESS_CREATE); 2909 if (ret < 0) { 2910 mlog_errno(ret); 2911 goto end; 2912 } 2913 2914 /* Initialize ocfs2_xattr_block */ 2915 xblk = (struct ocfs2_xattr_block *)new_bh->b_data; 2916 memset(xblk, 0, inode->i_sb->s_blocksize); 2917 strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE); 2918 xblk->xb_suballoc_slot = cpu_to_le16(ctxt->meta_ac->ac_alloc_slot); 2919 xblk->xb_suballoc_loc = cpu_to_le64(suballoc_loc); 2920 xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start); 2921 xblk->xb_fs_generation = 2922 cpu_to_le32(OCFS2_SB(inode->i_sb)->fs_generation); 2923 xblk->xb_blkno = cpu_to_le64(first_blkno); 2924 if (indexed) { 2925 struct ocfs2_xattr_tree_root *xr = &xblk->xb_attrs.xb_root; 2926 xr->xt_clusters = cpu_to_le32(1); 2927 xr->xt_last_eb_blk = 0; 2928 xr->xt_list.l_tree_depth = 0; 2929 xr->xt_list.l_count = cpu_to_le16( 2930 ocfs2_xattr_recs_per_xb(inode->i_sb)); 2931 xr->xt_list.l_next_free_rec = cpu_to_le16(1); 2932 xblk->xb_flags = cpu_to_le16(OCFS2_XATTR_INDEXED); 2933 } 2934 ocfs2_journal_dirty(ctxt->handle, new_bh); 2935 2936 /* Add it to the inode */ 2937 di->i_xattr_loc = cpu_to_le64(first_blkno); 2938 2939 spin_lock(&OCFS2_I(inode)->ip_lock); 2940 OCFS2_I(inode)->ip_dyn_features |= OCFS2_HAS_XATTR_FL; 2941 di->i_dyn_features = cpu_to_le16(OCFS2_I(inode)->ip_dyn_features); 2942 spin_unlock(&OCFS2_I(inode)->ip_lock); 2943 2944 ocfs2_journal_dirty(ctxt->handle, inode_bh); 2945 2946 *ret_bh = new_bh; 2947 new_bh = NULL; 2948 2949 end: 2950 brelse(new_bh); 2951 return ret; 2952 } 2953 2954 /* 2955 * ocfs2_xattr_block_set() 2956 * 2957 * Set, replace or remove an extended attribute into external block. 2958 * 2959 */ 2960 static int ocfs2_xattr_block_set(struct inode *inode, 2961 struct ocfs2_xattr_info *xi, 2962 struct ocfs2_xattr_search *xs, 2963 struct ocfs2_xattr_set_ctxt *ctxt) 2964 { 2965 struct buffer_head *new_bh = NULL; 2966 struct ocfs2_xattr_block *xblk = NULL; 2967 int ret; 2968 struct ocfs2_xa_loc loc; 2969 2970 if (!xs->xattr_bh) { 2971 ret = ocfs2_create_xattr_block(inode, xs->inode_bh, ctxt, 2972 0, &new_bh); 2973 if (ret) { 2974 mlog_errno(ret); 2975 goto end; 2976 } 2977 2978 xs->xattr_bh = new_bh; 2979 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; 2980 xs->header = &xblk->xb_attrs.xb_header; 2981 xs->base = (void *)xs->header; 2982 xs->end = (void *)xblk + inode->i_sb->s_blocksize; 2983 xs->here = xs->header->xh_entries; 2984 } else 2985 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; 2986 2987 if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) { 2988 ocfs2_init_xattr_block_xa_loc(&loc, inode, xs->xattr_bh, 2989 xs->not_found ? NULL : xs->here); 2990 2991 ret = ocfs2_xa_set(&loc, xi, ctxt); 2992 if (!ret) 2993 xs->here = loc.xl_entry; 2994 else if ((ret != -ENOSPC) || ctxt->set_abort) 2995 goto end; 2996 else { 2997 ret = ocfs2_xattr_create_index_block(inode, xs, ctxt); 2998 if (ret) 2999 goto end; 3000 } 3001 } 3002 3003 if (le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED) 3004 ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt); 3005 3006 end: 3007 return ret; 3008 } 3009 3010 /* Check whether the new xattr can be inserted into the inode. */ 3011 static int ocfs2_xattr_can_be_in_inode(struct inode *inode, 3012 struct ocfs2_xattr_info *xi, 3013 struct ocfs2_xattr_search *xs) 3014 { 3015 struct ocfs2_xattr_entry *last; 3016 int free, i; 3017 size_t min_offs = xs->end - xs->base; 3018 3019 if (!xs->header) 3020 return 0; 3021 3022 last = xs->header->xh_entries; 3023 3024 for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) { 3025 size_t offs = le16_to_cpu(last->xe_name_offset); 3026 if (offs < min_offs) 3027 min_offs = offs; 3028 last += 1; 3029 } 3030 3031 free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP; 3032 if (free < 0) 3033 return 0; 3034 3035 BUG_ON(!xs->not_found); 3036 3037 if (free >= (sizeof(struct ocfs2_xattr_entry) + namevalue_size_xi(xi))) 3038 return 1; 3039 3040 return 0; 3041 } 3042 3043 static int ocfs2_calc_xattr_set_need(struct inode *inode, 3044 struct ocfs2_dinode *di, 3045 struct ocfs2_xattr_info *xi, 3046 struct ocfs2_xattr_search *xis, 3047 struct ocfs2_xattr_search *xbs, 3048 int *clusters_need, 3049 int *meta_need, 3050 int *credits_need) 3051 { 3052 int ret = 0, old_in_xb = 0; 3053 int clusters_add = 0, meta_add = 0, credits = 0; 3054 struct buffer_head *bh = NULL; 3055 struct ocfs2_xattr_block *xb = NULL; 3056 struct ocfs2_xattr_entry *xe = NULL; 3057 struct ocfs2_xattr_value_root *xv = NULL; 3058 char *base = NULL; 3059 int name_offset, name_len = 0; 3060 u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, 3061 xi->xi_value_len); 3062 u64 value_size; 3063 3064 /* 3065 * Calculate the clusters we need to write. 3066 * No matter whether we replace an old one or add a new one, 3067 * we need this for writing. 3068 */ 3069 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) 3070 credits += new_clusters * 3071 ocfs2_clusters_to_blocks(inode->i_sb, 1); 3072 3073 if (xis->not_found && xbs->not_found) { 3074 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3075 3076 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 3077 clusters_add += new_clusters; 3078 credits += ocfs2_calc_extend_credits(inode->i_sb, 3079 &def_xv.xv.xr_list); 3080 } 3081 3082 goto meta_guess; 3083 } 3084 3085 if (!xis->not_found) { 3086 xe = xis->here; 3087 name_offset = le16_to_cpu(xe->xe_name_offset); 3088 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 3089 base = xis->base; 3090 credits += OCFS2_INODE_UPDATE_CREDITS; 3091 } else { 3092 int i, block_off = 0; 3093 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; 3094 xe = xbs->here; 3095 name_offset = le16_to_cpu(xe->xe_name_offset); 3096 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 3097 i = xbs->here - xbs->header->xh_entries; 3098 old_in_xb = 1; 3099 3100 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 3101 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 3102 bucket_xh(xbs->bucket), 3103 i, &block_off, 3104 &name_offset); 3105 base = bucket_block(xbs->bucket, block_off); 3106 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3107 } else { 3108 base = xbs->base; 3109 credits += OCFS2_XATTR_BLOCK_UPDATE_CREDITS; 3110 } 3111 } 3112 3113 /* 3114 * delete a xattr doesn't need metadata and cluster allocation. 3115 * so just calculate the credits and return. 3116 * 3117 * The credits for removing the value tree will be extended 3118 * by ocfs2_remove_extent itself. 3119 */ 3120 if (!xi->xi_value) { 3121 if (!ocfs2_xattr_is_local(xe)) 3122 credits += ocfs2_remove_extent_credits(inode->i_sb); 3123 3124 goto out; 3125 } 3126 3127 /* do cluster allocation guess first. */ 3128 value_size = le64_to_cpu(xe->xe_value_size); 3129 3130 if (old_in_xb) { 3131 /* 3132 * In xattr set, we always try to set the xe in inode first, 3133 * so if it can be inserted into inode successfully, the old 3134 * one will be removed from the xattr block, and this xattr 3135 * will be inserted into inode as a new xattr in inode. 3136 */ 3137 if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) { 3138 clusters_add += new_clusters; 3139 credits += ocfs2_remove_extent_credits(inode->i_sb) + 3140 OCFS2_INODE_UPDATE_CREDITS; 3141 if (!ocfs2_xattr_is_local(xe)) 3142 credits += ocfs2_calc_extend_credits( 3143 inode->i_sb, 3144 &def_xv.xv.xr_list); 3145 goto out; 3146 } 3147 } 3148 3149 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 3150 /* the new values will be stored outside. */ 3151 u32 old_clusters = 0; 3152 3153 if (!ocfs2_xattr_is_local(xe)) { 3154 old_clusters = ocfs2_clusters_for_bytes(inode->i_sb, 3155 value_size); 3156 xv = (struct ocfs2_xattr_value_root *) 3157 (base + name_offset + name_len); 3158 value_size = OCFS2_XATTR_ROOT_SIZE; 3159 } else 3160 xv = &def_xv.xv; 3161 3162 if (old_clusters >= new_clusters) { 3163 credits += ocfs2_remove_extent_credits(inode->i_sb); 3164 goto out; 3165 } else { 3166 meta_add += ocfs2_extend_meta_needed(&xv->xr_list); 3167 clusters_add += new_clusters - old_clusters; 3168 credits += ocfs2_calc_extend_credits(inode->i_sb, 3169 &xv->xr_list); 3170 if (value_size >= OCFS2_XATTR_ROOT_SIZE) 3171 goto out; 3172 } 3173 } else { 3174 /* 3175 * Now the new value will be stored inside. So if the new 3176 * value is smaller than the size of value root or the old 3177 * value, we don't need any allocation, otherwise we have 3178 * to guess metadata allocation. 3179 */ 3180 if ((ocfs2_xattr_is_local(xe) && 3181 (value_size >= xi->xi_value_len)) || 3182 (!ocfs2_xattr_is_local(xe) && 3183 OCFS2_XATTR_ROOT_SIZE >= xi->xi_value_len)) 3184 goto out; 3185 } 3186 3187 meta_guess: 3188 /* calculate metadata allocation. */ 3189 if (di->i_xattr_loc) { 3190 if (!xbs->xattr_bh) { 3191 ret = ocfs2_read_xattr_block(inode, 3192 le64_to_cpu(di->i_xattr_loc), 3193 &bh); 3194 if (ret) { 3195 mlog_errno(ret); 3196 goto out; 3197 } 3198 3199 xb = (struct ocfs2_xattr_block *)bh->b_data; 3200 } else 3201 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; 3202 3203 /* 3204 * If there is already an xattr tree, good, we can calculate 3205 * like other b-trees. Otherwise we may have the chance of 3206 * create a tree, the credit calculation is borrowed from 3207 * ocfs2_calc_extend_credits with root_el = NULL. And the 3208 * new tree will be cluster based, so no meta is needed. 3209 */ 3210 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 3211 struct ocfs2_extent_list *el = 3212 &xb->xb_attrs.xb_root.xt_list; 3213 meta_add += ocfs2_extend_meta_needed(el); 3214 credits += ocfs2_calc_extend_credits(inode->i_sb, 3215 el); 3216 } else 3217 credits += OCFS2_SUBALLOC_ALLOC + 1; 3218 3219 /* 3220 * This cluster will be used either for new bucket or for 3221 * new xattr block. 3222 * If the cluster size is the same as the bucket size, one 3223 * more is needed since we may need to extend the bucket 3224 * also. 3225 */ 3226 clusters_add += 1; 3227 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3228 if (OCFS2_XATTR_BUCKET_SIZE == 3229 OCFS2_SB(inode->i_sb)->s_clustersize) { 3230 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3231 clusters_add += 1; 3232 } 3233 } else { 3234 credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; 3235 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 3236 struct ocfs2_extent_list *el = &def_xv.xv.xr_list; 3237 meta_add += ocfs2_extend_meta_needed(el); 3238 credits += ocfs2_calc_extend_credits(inode->i_sb, 3239 el); 3240 } else { 3241 meta_add += 1; 3242 } 3243 } 3244 out: 3245 if (clusters_need) 3246 *clusters_need = clusters_add; 3247 if (meta_need) 3248 *meta_need = meta_add; 3249 if (credits_need) 3250 *credits_need = credits; 3251 brelse(bh); 3252 return ret; 3253 } 3254 3255 static int ocfs2_init_xattr_set_ctxt(struct inode *inode, 3256 struct ocfs2_dinode *di, 3257 struct ocfs2_xattr_info *xi, 3258 struct ocfs2_xattr_search *xis, 3259 struct ocfs2_xattr_search *xbs, 3260 struct ocfs2_xattr_set_ctxt *ctxt, 3261 int extra_meta, 3262 int *credits) 3263 { 3264 int clusters_add, meta_add, ret; 3265 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 3266 3267 memset(ctxt, 0, sizeof(struct ocfs2_xattr_set_ctxt)); 3268 3269 ocfs2_init_dealloc_ctxt(&ctxt->dealloc); 3270 3271 ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs, 3272 &clusters_add, &meta_add, credits); 3273 if (ret) { 3274 mlog_errno(ret); 3275 return ret; 3276 } 3277 3278 meta_add += extra_meta; 3279 trace_ocfs2_init_xattr_set_ctxt(xi->xi_name, meta_add, 3280 clusters_add, *credits); 3281 3282 if (meta_add) { 3283 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, 3284 &ctxt->meta_ac); 3285 if (ret) { 3286 mlog_errno(ret); 3287 goto out; 3288 } 3289 } 3290 3291 if (clusters_add) { 3292 ret = ocfs2_reserve_clusters(osb, clusters_add, &ctxt->data_ac); 3293 if (ret) 3294 mlog_errno(ret); 3295 } 3296 out: 3297 if (ret) { 3298 if (ctxt->meta_ac) { 3299 ocfs2_free_alloc_context(ctxt->meta_ac); 3300 ctxt->meta_ac = NULL; 3301 } 3302 3303 /* 3304 * We cannot have an error and a non null ctxt->data_ac. 3305 */ 3306 } 3307 3308 return ret; 3309 } 3310 3311 static int __ocfs2_xattr_set_handle(struct inode *inode, 3312 struct ocfs2_dinode *di, 3313 struct ocfs2_xattr_info *xi, 3314 struct ocfs2_xattr_search *xis, 3315 struct ocfs2_xattr_search *xbs, 3316 struct ocfs2_xattr_set_ctxt *ctxt) 3317 { 3318 int ret = 0, credits, old_found; 3319 3320 if (!xi->xi_value) { 3321 /* Remove existing extended attribute */ 3322 if (!xis->not_found) 3323 ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt); 3324 else if (!xbs->not_found) 3325 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); 3326 } else { 3327 /* We always try to set extended attribute into inode first*/ 3328 ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt); 3329 if (!ret && !xbs->not_found) { 3330 /* 3331 * If succeed and that extended attribute existing in 3332 * external block, then we will remove it. 3333 */ 3334 xi->xi_value = NULL; 3335 xi->xi_value_len = 0; 3336 3337 old_found = xis->not_found; 3338 xis->not_found = -ENODATA; 3339 ret = ocfs2_calc_xattr_set_need(inode, 3340 di, 3341 xi, 3342 xis, 3343 xbs, 3344 NULL, 3345 NULL, 3346 &credits); 3347 xis->not_found = old_found; 3348 if (ret) { 3349 mlog_errno(ret); 3350 goto out; 3351 } 3352 3353 ret = ocfs2_extend_trans(ctxt->handle, credits); 3354 if (ret) { 3355 mlog_errno(ret); 3356 goto out; 3357 } 3358 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); 3359 } else if ((ret == -ENOSPC) && !ctxt->set_abort) { 3360 if (di->i_xattr_loc && !xbs->xattr_bh) { 3361 ret = ocfs2_xattr_block_find(inode, 3362 xi->xi_name_index, 3363 xi->xi_name, xbs); 3364 if (ret) 3365 goto out; 3366 3367 old_found = xis->not_found; 3368 xis->not_found = -ENODATA; 3369 ret = ocfs2_calc_xattr_set_need(inode, 3370 di, 3371 xi, 3372 xis, 3373 xbs, 3374 NULL, 3375 NULL, 3376 &credits); 3377 xis->not_found = old_found; 3378 if (ret) { 3379 mlog_errno(ret); 3380 goto out; 3381 } 3382 3383 ret = ocfs2_extend_trans(ctxt->handle, credits); 3384 if (ret) { 3385 mlog_errno(ret); 3386 goto out; 3387 } 3388 } 3389 /* 3390 * If no space in inode, we will set extended attribute 3391 * into external block. 3392 */ 3393 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); 3394 if (ret) 3395 goto out; 3396 if (!xis->not_found) { 3397 /* 3398 * If succeed and that extended attribute 3399 * existing in inode, we will remove it. 3400 */ 3401 xi->xi_value = NULL; 3402 xi->xi_value_len = 0; 3403 xbs->not_found = -ENODATA; 3404 ret = ocfs2_calc_xattr_set_need(inode, 3405 di, 3406 xi, 3407 xis, 3408 xbs, 3409 NULL, 3410 NULL, 3411 &credits); 3412 if (ret) { 3413 mlog_errno(ret); 3414 goto out; 3415 } 3416 3417 ret = ocfs2_extend_trans(ctxt->handle, credits); 3418 if (ret) { 3419 mlog_errno(ret); 3420 goto out; 3421 } 3422 ret = ocfs2_xattr_ibody_set(inode, xi, 3423 xis, ctxt); 3424 } 3425 } 3426 } 3427 3428 if (!ret) { 3429 /* Update inode ctime. */ 3430 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), 3431 xis->inode_bh, 3432 OCFS2_JOURNAL_ACCESS_WRITE); 3433 if (ret) { 3434 mlog_errno(ret); 3435 goto out; 3436 } 3437 3438 inode->i_ctime = current_time(inode); 3439 di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); 3440 di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); 3441 ocfs2_journal_dirty(ctxt->handle, xis->inode_bh); 3442 } 3443 out: 3444 return ret; 3445 } 3446 3447 /* 3448 * This function only called duing creating inode 3449 * for init security/acl xattrs of the new inode. 3450 * All transanction credits have been reserved in mknod. 3451 */ 3452 int ocfs2_xattr_set_handle(handle_t *handle, 3453 struct inode *inode, 3454 struct buffer_head *di_bh, 3455 int name_index, 3456 const char *name, 3457 const void *value, 3458 size_t value_len, 3459 int flags, 3460 struct ocfs2_alloc_context *meta_ac, 3461 struct ocfs2_alloc_context *data_ac) 3462 { 3463 struct ocfs2_dinode *di; 3464 int ret; 3465 3466 struct ocfs2_xattr_info xi = { 3467 .xi_name_index = name_index, 3468 .xi_name = name, 3469 .xi_name_len = strlen(name), 3470 .xi_value = value, 3471 .xi_value_len = value_len, 3472 }; 3473 3474 struct ocfs2_xattr_search xis = { 3475 .not_found = -ENODATA, 3476 }; 3477 3478 struct ocfs2_xattr_search xbs = { 3479 .not_found = -ENODATA, 3480 }; 3481 3482 struct ocfs2_xattr_set_ctxt ctxt = { 3483 .handle = handle, 3484 .meta_ac = meta_ac, 3485 .data_ac = data_ac, 3486 }; 3487 3488 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) 3489 return -EOPNOTSUPP; 3490 3491 /* 3492 * In extreme situation, may need xattr bucket when 3493 * block size is too small. And we have already reserved 3494 * the credits for bucket in mknod. 3495 */ 3496 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) { 3497 xbs.bucket = ocfs2_xattr_bucket_new(inode); 3498 if (!xbs.bucket) { 3499 mlog_errno(-ENOMEM); 3500 return -ENOMEM; 3501 } 3502 } 3503 3504 xis.inode_bh = xbs.inode_bh = di_bh; 3505 di = (struct ocfs2_dinode *)di_bh->b_data; 3506 3507 down_write(&OCFS2_I(inode)->ip_xattr_sem); 3508 3509 ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis); 3510 if (ret) 3511 goto cleanup; 3512 if (xis.not_found) { 3513 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs); 3514 if (ret) 3515 goto cleanup; 3516 } 3517 3518 ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt); 3519 3520 cleanup: 3521 up_write(&OCFS2_I(inode)->ip_xattr_sem); 3522 brelse(xbs.xattr_bh); 3523 ocfs2_xattr_bucket_free(xbs.bucket); 3524 3525 return ret; 3526 } 3527 3528 /* 3529 * ocfs2_xattr_set() 3530 * 3531 * Set, replace or remove an extended attribute for this inode. 3532 * value is NULL to remove an existing extended attribute, else either 3533 * create or replace an extended attribute. 3534 */ 3535 int ocfs2_xattr_set(struct inode *inode, 3536 int name_index, 3537 const char *name, 3538 const void *value, 3539 size_t value_len, 3540 int flags) 3541 { 3542 struct buffer_head *di_bh = NULL; 3543 struct ocfs2_dinode *di; 3544 int ret, credits, had_lock, ref_meta = 0, ref_credits = 0; 3545 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 3546 struct inode *tl_inode = osb->osb_tl_inode; 3547 struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, NULL, }; 3548 struct ocfs2_refcount_tree *ref_tree = NULL; 3549 struct ocfs2_lock_holder oh; 3550 3551 struct ocfs2_xattr_info xi = { 3552 .xi_name_index = name_index, 3553 .xi_name = name, 3554 .xi_name_len = strlen(name), 3555 .xi_value = value, 3556 .xi_value_len = value_len, 3557 }; 3558 3559 struct ocfs2_xattr_search xis = { 3560 .not_found = -ENODATA, 3561 }; 3562 3563 struct ocfs2_xattr_search xbs = { 3564 .not_found = -ENODATA, 3565 }; 3566 3567 if (!ocfs2_supports_xattr(osb)) 3568 return -EOPNOTSUPP; 3569 3570 /* 3571 * Only xbs will be used on indexed trees. xis doesn't need a 3572 * bucket. 3573 */ 3574 xbs.bucket = ocfs2_xattr_bucket_new(inode); 3575 if (!xbs.bucket) { 3576 mlog_errno(-ENOMEM); 3577 return -ENOMEM; 3578 } 3579 3580 had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 1, &oh); 3581 if (had_lock < 0) { 3582 ret = had_lock; 3583 mlog_errno(ret); 3584 goto cleanup_nolock; 3585 } 3586 xis.inode_bh = xbs.inode_bh = di_bh; 3587 di = (struct ocfs2_dinode *)di_bh->b_data; 3588 3589 down_write(&OCFS2_I(inode)->ip_xattr_sem); 3590 /* 3591 * Scan inode and external block to find the same name 3592 * extended attribute and collect search information. 3593 */ 3594 ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis); 3595 if (ret) 3596 goto cleanup; 3597 if (xis.not_found) { 3598 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs); 3599 if (ret) 3600 goto cleanup; 3601 } 3602 3603 if (xis.not_found && xbs.not_found) { 3604 ret = -ENODATA; 3605 if (flags & XATTR_REPLACE) 3606 goto cleanup; 3607 ret = 0; 3608 if (!value) 3609 goto cleanup; 3610 } else { 3611 ret = -EEXIST; 3612 if (flags & XATTR_CREATE) 3613 goto cleanup; 3614 } 3615 3616 /* Check whether the value is refcounted and do some preparation. */ 3617 if (ocfs2_is_refcount_inode(inode) && 3618 (!xis.not_found || !xbs.not_found)) { 3619 ret = ocfs2_prepare_refcount_xattr(inode, di, &xi, 3620 &xis, &xbs, &ref_tree, 3621 &ref_meta, &ref_credits); 3622 if (ret) { 3623 mlog_errno(ret); 3624 goto cleanup; 3625 } 3626 } 3627 3628 inode_lock(tl_inode); 3629 3630 if (ocfs2_truncate_log_needs_flush(osb)) { 3631 ret = __ocfs2_flush_truncate_log(osb); 3632 if (ret < 0) { 3633 inode_unlock(tl_inode); 3634 mlog_errno(ret); 3635 goto cleanup; 3636 } 3637 } 3638 inode_unlock(tl_inode); 3639 3640 ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis, 3641 &xbs, &ctxt, ref_meta, &credits); 3642 if (ret) { 3643 mlog_errno(ret); 3644 goto cleanup; 3645 } 3646 3647 /* we need to update inode's ctime field, so add credit for it. */ 3648 credits += OCFS2_INODE_UPDATE_CREDITS; 3649 ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits); 3650 if (IS_ERR(ctxt.handle)) { 3651 ret = PTR_ERR(ctxt.handle); 3652 mlog_errno(ret); 3653 goto out_free_ac; 3654 } 3655 3656 ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt); 3657 ocfs2_update_inode_fsync_trans(ctxt.handle, inode, 0); 3658 3659 ocfs2_commit_trans(osb, ctxt.handle); 3660 3661 out_free_ac: 3662 if (ctxt.data_ac) 3663 ocfs2_free_alloc_context(ctxt.data_ac); 3664 if (ctxt.meta_ac) 3665 ocfs2_free_alloc_context(ctxt.meta_ac); 3666 if (ocfs2_dealloc_has_cluster(&ctxt.dealloc)) 3667 ocfs2_schedule_truncate_log_flush(osb, 1); 3668 ocfs2_run_deallocs(osb, &ctxt.dealloc); 3669 3670 cleanup: 3671 if (ref_tree) 3672 ocfs2_unlock_refcount_tree(osb, ref_tree, 1); 3673 up_write(&OCFS2_I(inode)->ip_xattr_sem); 3674 if (!value && !ret) { 3675 ret = ocfs2_try_remove_refcount_tree(inode, di_bh); 3676 if (ret) 3677 mlog_errno(ret); 3678 } 3679 ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock); 3680 cleanup_nolock: 3681 brelse(di_bh); 3682 brelse(xbs.xattr_bh); 3683 ocfs2_xattr_bucket_free(xbs.bucket); 3684 3685 return ret; 3686 } 3687 3688 /* 3689 * Find the xattr extent rec which may contains name_hash. 3690 * e_cpos will be the first name hash of the xattr rec. 3691 * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list. 3692 */ 3693 static int ocfs2_xattr_get_rec(struct inode *inode, 3694 u32 name_hash, 3695 u64 *p_blkno, 3696 u32 *e_cpos, 3697 u32 *num_clusters, 3698 struct ocfs2_extent_list *el) 3699 { 3700 int ret = 0, i; 3701 struct buffer_head *eb_bh = NULL; 3702 struct ocfs2_extent_block *eb; 3703 struct ocfs2_extent_rec *rec = NULL; 3704 u64 e_blkno = 0; 3705 3706 if (el->l_tree_depth) { 3707 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, name_hash, 3708 &eb_bh); 3709 if (ret) { 3710 mlog_errno(ret); 3711 goto out; 3712 } 3713 3714 eb = (struct ocfs2_extent_block *) eb_bh->b_data; 3715 el = &eb->h_list; 3716 3717 if (el->l_tree_depth) { 3718 ret = ocfs2_error(inode->i_sb, 3719 "Inode %lu has non zero tree depth in xattr tree block %llu\n", 3720 inode->i_ino, 3721 (unsigned long long)eb_bh->b_blocknr); 3722 goto out; 3723 } 3724 } 3725 3726 for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) { 3727 rec = &el->l_recs[i]; 3728 3729 if (le32_to_cpu(rec->e_cpos) <= name_hash) { 3730 e_blkno = le64_to_cpu(rec->e_blkno); 3731 break; 3732 } 3733 } 3734 3735 if (!e_blkno) { 3736 ret = ocfs2_error(inode->i_sb, "Inode %lu has bad extent record (%u, %u, 0) in xattr\n", 3737 inode->i_ino, 3738 le32_to_cpu(rec->e_cpos), 3739 ocfs2_rec_clusters(el, rec)); 3740 goto out; 3741 } 3742 3743 *p_blkno = le64_to_cpu(rec->e_blkno); 3744 *num_clusters = le16_to_cpu(rec->e_leaf_clusters); 3745 if (e_cpos) 3746 *e_cpos = le32_to_cpu(rec->e_cpos); 3747 out: 3748 brelse(eb_bh); 3749 return ret; 3750 } 3751 3752 typedef int (xattr_bucket_func)(struct inode *inode, 3753 struct ocfs2_xattr_bucket *bucket, 3754 void *para); 3755 3756 static int ocfs2_find_xe_in_bucket(struct inode *inode, 3757 struct ocfs2_xattr_bucket *bucket, 3758 int name_index, 3759 const char *name, 3760 u32 name_hash, 3761 u16 *xe_index, 3762 int *found) 3763 { 3764 int i, ret = 0, cmp = 1, block_off, new_offset; 3765 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 3766 size_t name_len = strlen(name); 3767 struct ocfs2_xattr_entry *xe = NULL; 3768 char *xe_name; 3769 3770 /* 3771 * We don't use binary search in the bucket because there 3772 * may be multiple entries with the same name hash. 3773 */ 3774 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 3775 xe = &xh->xh_entries[i]; 3776 3777 if (name_hash > le32_to_cpu(xe->xe_name_hash)) 3778 continue; 3779 else if (name_hash < le32_to_cpu(xe->xe_name_hash)) 3780 break; 3781 3782 cmp = name_index - ocfs2_xattr_get_type(xe); 3783 if (!cmp) 3784 cmp = name_len - xe->xe_name_len; 3785 if (cmp) 3786 continue; 3787 3788 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 3789 xh, 3790 i, 3791 &block_off, 3792 &new_offset); 3793 if (ret) { 3794 mlog_errno(ret); 3795 break; 3796 } 3797 3798 3799 xe_name = bucket_block(bucket, block_off) + new_offset; 3800 if (!memcmp(name, xe_name, name_len)) { 3801 *xe_index = i; 3802 *found = 1; 3803 ret = 0; 3804 break; 3805 } 3806 } 3807 3808 return ret; 3809 } 3810 3811 /* 3812 * Find the specified xattr entry in a series of buckets. 3813 * This series start from p_blkno and last for num_clusters. 3814 * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains 3815 * the num of the valid buckets. 3816 * 3817 * Return the buffer_head this xattr should reside in. And if the xattr's 3818 * hash is in the gap of 2 buckets, return the lower bucket. 3819 */ 3820 static int ocfs2_xattr_bucket_find(struct inode *inode, 3821 int name_index, 3822 const char *name, 3823 u32 name_hash, 3824 u64 p_blkno, 3825 u32 first_hash, 3826 u32 num_clusters, 3827 struct ocfs2_xattr_search *xs) 3828 { 3829 int ret, found = 0; 3830 struct ocfs2_xattr_header *xh = NULL; 3831 struct ocfs2_xattr_entry *xe = NULL; 3832 u16 index = 0; 3833 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3834 int low_bucket = 0, bucket, high_bucket; 3835 struct ocfs2_xattr_bucket *search; 3836 u32 last_hash; 3837 u64 blkno, lower_blkno = 0; 3838 3839 search = ocfs2_xattr_bucket_new(inode); 3840 if (!search) { 3841 ret = -ENOMEM; 3842 mlog_errno(ret); 3843 goto out; 3844 } 3845 3846 ret = ocfs2_read_xattr_bucket(search, p_blkno); 3847 if (ret) { 3848 mlog_errno(ret); 3849 goto out; 3850 } 3851 3852 xh = bucket_xh(search); 3853 high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1; 3854 while (low_bucket <= high_bucket) { 3855 ocfs2_xattr_bucket_relse(search); 3856 3857 bucket = (low_bucket + high_bucket) / 2; 3858 blkno = p_blkno + bucket * blk_per_bucket; 3859 ret = ocfs2_read_xattr_bucket(search, blkno); 3860 if (ret) { 3861 mlog_errno(ret); 3862 goto out; 3863 } 3864 3865 xh = bucket_xh(search); 3866 xe = &xh->xh_entries[0]; 3867 if (name_hash < le32_to_cpu(xe->xe_name_hash)) { 3868 high_bucket = bucket - 1; 3869 continue; 3870 } 3871 3872 /* 3873 * Check whether the hash of the last entry in our 3874 * bucket is larger than the search one. for an empty 3875 * bucket, the last one is also the first one. 3876 */ 3877 if (xh->xh_count) 3878 xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1]; 3879 3880 last_hash = le32_to_cpu(xe->xe_name_hash); 3881 3882 /* record lower_blkno which may be the insert place. */ 3883 lower_blkno = blkno; 3884 3885 if (name_hash > le32_to_cpu(xe->xe_name_hash)) { 3886 low_bucket = bucket + 1; 3887 continue; 3888 } 3889 3890 /* the searched xattr should reside in this bucket if exists. */ 3891 ret = ocfs2_find_xe_in_bucket(inode, search, 3892 name_index, name, name_hash, 3893 &index, &found); 3894 if (ret) { 3895 mlog_errno(ret); 3896 goto out; 3897 } 3898 break; 3899 } 3900 3901 /* 3902 * Record the bucket we have found. 3903 * When the xattr's hash value is in the gap of 2 buckets, we will 3904 * always set it to the previous bucket. 3905 */ 3906 if (!lower_blkno) 3907 lower_blkno = p_blkno; 3908 3909 /* This should be in cache - we just read it during the search */ 3910 ret = ocfs2_read_xattr_bucket(xs->bucket, lower_blkno); 3911 if (ret) { 3912 mlog_errno(ret); 3913 goto out; 3914 } 3915 3916 xs->header = bucket_xh(xs->bucket); 3917 xs->base = bucket_block(xs->bucket, 0); 3918 xs->end = xs->base + inode->i_sb->s_blocksize; 3919 3920 if (found) { 3921 xs->here = &xs->header->xh_entries[index]; 3922 trace_ocfs2_xattr_bucket_find(OCFS2_I(inode)->ip_blkno, 3923 name, name_index, name_hash, 3924 (unsigned long long)bucket_blkno(xs->bucket), 3925 index); 3926 } else 3927 ret = -ENODATA; 3928 3929 out: 3930 ocfs2_xattr_bucket_free(search); 3931 return ret; 3932 } 3933 3934 static int ocfs2_xattr_index_block_find(struct inode *inode, 3935 struct buffer_head *root_bh, 3936 int name_index, 3937 const char *name, 3938 struct ocfs2_xattr_search *xs) 3939 { 3940 int ret; 3941 struct ocfs2_xattr_block *xb = 3942 (struct ocfs2_xattr_block *)root_bh->b_data; 3943 struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root; 3944 struct ocfs2_extent_list *el = &xb_root->xt_list; 3945 u64 p_blkno = 0; 3946 u32 first_hash, num_clusters = 0; 3947 u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name)); 3948 3949 if (le16_to_cpu(el->l_next_free_rec) == 0) 3950 return -ENODATA; 3951 3952 trace_ocfs2_xattr_index_block_find(OCFS2_I(inode)->ip_blkno, 3953 name, name_index, name_hash, 3954 (unsigned long long)root_bh->b_blocknr, 3955 -1); 3956 3957 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash, 3958 &num_clusters, el); 3959 if (ret) { 3960 mlog_errno(ret); 3961 goto out; 3962 } 3963 3964 BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash); 3965 3966 trace_ocfs2_xattr_index_block_find_rec(OCFS2_I(inode)->ip_blkno, 3967 name, name_index, first_hash, 3968 (unsigned long long)p_blkno, 3969 num_clusters); 3970 3971 ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash, 3972 p_blkno, first_hash, num_clusters, xs); 3973 3974 out: 3975 return ret; 3976 } 3977 3978 static int ocfs2_iterate_xattr_buckets(struct inode *inode, 3979 u64 blkno, 3980 u32 clusters, 3981 xattr_bucket_func *func, 3982 void *para) 3983 { 3984 int i, ret = 0; 3985 u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)); 3986 u32 num_buckets = clusters * bpc; 3987 struct ocfs2_xattr_bucket *bucket; 3988 3989 bucket = ocfs2_xattr_bucket_new(inode); 3990 if (!bucket) { 3991 mlog_errno(-ENOMEM); 3992 return -ENOMEM; 3993 } 3994 3995 trace_ocfs2_iterate_xattr_buckets( 3996 (unsigned long long)OCFS2_I(inode)->ip_blkno, 3997 (unsigned long long)blkno, clusters); 3998 3999 for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) { 4000 ret = ocfs2_read_xattr_bucket(bucket, blkno); 4001 if (ret) { 4002 mlog_errno(ret); 4003 break; 4004 } 4005 4006 /* 4007 * The real bucket num in this series of blocks is stored 4008 * in the 1st bucket. 4009 */ 4010 if (i == 0) 4011 num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets); 4012 4013 trace_ocfs2_iterate_xattr_bucket((unsigned long long)blkno, 4014 le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash)); 4015 if (func) { 4016 ret = func(inode, bucket, para); 4017 if (ret && ret != -ERANGE) 4018 mlog_errno(ret); 4019 /* Fall through to bucket_relse() */ 4020 } 4021 4022 ocfs2_xattr_bucket_relse(bucket); 4023 if (ret) 4024 break; 4025 } 4026 4027 ocfs2_xattr_bucket_free(bucket); 4028 return ret; 4029 } 4030 4031 struct ocfs2_xattr_tree_list { 4032 char *buffer; 4033 size_t buffer_size; 4034 size_t result; 4035 }; 4036 4037 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb, 4038 struct ocfs2_xattr_header *xh, 4039 int index, 4040 int *block_off, 4041 int *new_offset) 4042 { 4043 u16 name_offset; 4044 4045 if (index < 0 || index >= le16_to_cpu(xh->xh_count)) 4046 return -EINVAL; 4047 4048 name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset); 4049 4050 *block_off = name_offset >> sb->s_blocksize_bits; 4051 *new_offset = name_offset % sb->s_blocksize; 4052 4053 return 0; 4054 } 4055 4056 static int ocfs2_list_xattr_bucket(struct inode *inode, 4057 struct ocfs2_xattr_bucket *bucket, 4058 void *para) 4059 { 4060 int ret = 0, type; 4061 struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para; 4062 int i, block_off, new_offset; 4063 const char *name; 4064 4065 for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) { 4066 struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i]; 4067 type = ocfs2_xattr_get_type(entry); 4068 4069 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 4070 bucket_xh(bucket), 4071 i, 4072 &block_off, 4073 &new_offset); 4074 if (ret) 4075 break; 4076 4077 name = (const char *)bucket_block(bucket, block_off) + 4078 new_offset; 4079 ret = ocfs2_xattr_list_entry(inode->i_sb, 4080 xl->buffer, 4081 xl->buffer_size, 4082 &xl->result, 4083 type, name, 4084 entry->xe_name_len); 4085 if (ret) 4086 break; 4087 } 4088 4089 return ret; 4090 } 4091 4092 static int ocfs2_iterate_xattr_index_block(struct inode *inode, 4093 struct buffer_head *blk_bh, 4094 xattr_tree_rec_func *rec_func, 4095 void *para) 4096 { 4097 struct ocfs2_xattr_block *xb = 4098 (struct ocfs2_xattr_block *)blk_bh->b_data; 4099 struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list; 4100 int ret = 0; 4101 u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0; 4102 u64 p_blkno = 0; 4103 4104 if (!el->l_next_free_rec || !rec_func) 4105 return 0; 4106 4107 while (name_hash > 0) { 4108 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, 4109 &e_cpos, &num_clusters, el); 4110 if (ret) { 4111 mlog_errno(ret); 4112 break; 4113 } 4114 4115 ret = rec_func(inode, blk_bh, p_blkno, e_cpos, 4116 num_clusters, para); 4117 if (ret) { 4118 if (ret != -ERANGE) 4119 mlog_errno(ret); 4120 break; 4121 } 4122 4123 if (e_cpos == 0) 4124 break; 4125 4126 name_hash = e_cpos - 1; 4127 } 4128 4129 return ret; 4130 4131 } 4132 4133 static int ocfs2_list_xattr_tree_rec(struct inode *inode, 4134 struct buffer_head *root_bh, 4135 u64 blkno, u32 cpos, u32 len, void *para) 4136 { 4137 return ocfs2_iterate_xattr_buckets(inode, blkno, len, 4138 ocfs2_list_xattr_bucket, para); 4139 } 4140 4141 static int ocfs2_xattr_tree_list_index_block(struct inode *inode, 4142 struct buffer_head *blk_bh, 4143 char *buffer, 4144 size_t buffer_size) 4145 { 4146 int ret; 4147 struct ocfs2_xattr_tree_list xl = { 4148 .buffer = buffer, 4149 .buffer_size = buffer_size, 4150 .result = 0, 4151 }; 4152 4153 ret = ocfs2_iterate_xattr_index_block(inode, blk_bh, 4154 ocfs2_list_xattr_tree_rec, &xl); 4155 if (ret) { 4156 mlog_errno(ret); 4157 goto out; 4158 } 4159 4160 ret = xl.result; 4161 out: 4162 return ret; 4163 } 4164 4165 static int cmp_xe(const void *a, const void *b) 4166 { 4167 const struct ocfs2_xattr_entry *l = a, *r = b; 4168 u32 l_hash = le32_to_cpu(l->xe_name_hash); 4169 u32 r_hash = le32_to_cpu(r->xe_name_hash); 4170 4171 if (l_hash > r_hash) 4172 return 1; 4173 if (l_hash < r_hash) 4174 return -1; 4175 return 0; 4176 } 4177 4178 static void swap_xe(void *a, void *b, int size) 4179 { 4180 struct ocfs2_xattr_entry *l = a, *r = b, tmp; 4181 4182 tmp = *l; 4183 memcpy(l, r, sizeof(struct ocfs2_xattr_entry)); 4184 memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry)); 4185 } 4186 4187 /* 4188 * When the ocfs2_xattr_block is filled up, new bucket will be created 4189 * and all the xattr entries will be moved to the new bucket. 4190 * The header goes at the start of the bucket, and the names+values are 4191 * filled from the end. This is why *target starts as the last buffer. 4192 * Note: we need to sort the entries since they are not saved in order 4193 * in the ocfs2_xattr_block. 4194 */ 4195 static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode, 4196 struct buffer_head *xb_bh, 4197 struct ocfs2_xattr_bucket *bucket) 4198 { 4199 int i, blocksize = inode->i_sb->s_blocksize; 4200 int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 4201 u16 offset, size, off_change; 4202 struct ocfs2_xattr_entry *xe; 4203 struct ocfs2_xattr_block *xb = 4204 (struct ocfs2_xattr_block *)xb_bh->b_data; 4205 struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header; 4206 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 4207 u16 count = le16_to_cpu(xb_xh->xh_count); 4208 char *src = xb_bh->b_data; 4209 char *target = bucket_block(bucket, blks - 1); 4210 4211 trace_ocfs2_cp_xattr_block_to_bucket_begin( 4212 (unsigned long long)xb_bh->b_blocknr, 4213 (unsigned long long)bucket_blkno(bucket)); 4214 4215 for (i = 0; i < blks; i++) 4216 memset(bucket_block(bucket, i), 0, blocksize); 4217 4218 /* 4219 * Since the xe_name_offset is based on ocfs2_xattr_header, 4220 * there is a offset change corresponding to the change of 4221 * ocfs2_xattr_header's position. 4222 */ 4223 off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); 4224 xe = &xb_xh->xh_entries[count - 1]; 4225 offset = le16_to_cpu(xe->xe_name_offset) + off_change; 4226 size = blocksize - offset; 4227 4228 /* copy all the names and values. */ 4229 memcpy(target + offset, src + offset, size); 4230 4231 /* Init new header now. */ 4232 xh->xh_count = xb_xh->xh_count; 4233 xh->xh_num_buckets = cpu_to_le16(1); 4234 xh->xh_name_value_len = cpu_to_le16(size); 4235 xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size); 4236 4237 /* copy all the entries. */ 4238 target = bucket_block(bucket, 0); 4239 offset = offsetof(struct ocfs2_xattr_header, xh_entries); 4240 size = count * sizeof(struct ocfs2_xattr_entry); 4241 memcpy(target + offset, (char *)xb_xh + offset, size); 4242 4243 /* Change the xe offset for all the xe because of the move. */ 4244 off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize + 4245 offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); 4246 for (i = 0; i < count; i++) 4247 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change); 4248 4249 trace_ocfs2_cp_xattr_block_to_bucket_end(offset, size, off_change); 4250 4251 sort(target + offset, count, sizeof(struct ocfs2_xattr_entry), 4252 cmp_xe, swap_xe); 4253 } 4254 4255 /* 4256 * After we move xattr from block to index btree, we have to 4257 * update ocfs2_xattr_search to the new xe and base. 4258 * 4259 * When the entry is in xattr block, xattr_bh indicates the storage place. 4260 * While if the entry is in index b-tree, "bucket" indicates the 4261 * real place of the xattr. 4262 */ 4263 static void ocfs2_xattr_update_xattr_search(struct inode *inode, 4264 struct ocfs2_xattr_search *xs, 4265 struct buffer_head *old_bh) 4266 { 4267 char *buf = old_bh->b_data; 4268 struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf; 4269 struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header; 4270 int i; 4271 4272 xs->header = bucket_xh(xs->bucket); 4273 xs->base = bucket_block(xs->bucket, 0); 4274 xs->end = xs->base + inode->i_sb->s_blocksize; 4275 4276 if (xs->not_found) 4277 return; 4278 4279 i = xs->here - old_xh->xh_entries; 4280 xs->here = &xs->header->xh_entries[i]; 4281 } 4282 4283 static int ocfs2_xattr_create_index_block(struct inode *inode, 4284 struct ocfs2_xattr_search *xs, 4285 struct ocfs2_xattr_set_ctxt *ctxt) 4286 { 4287 int ret; 4288 u32 bit_off, len; 4289 u64 blkno; 4290 handle_t *handle = ctxt->handle; 4291 struct ocfs2_inode_info *oi = OCFS2_I(inode); 4292 struct buffer_head *xb_bh = xs->xattr_bh; 4293 struct ocfs2_xattr_block *xb = 4294 (struct ocfs2_xattr_block *)xb_bh->b_data; 4295 struct ocfs2_xattr_tree_root *xr; 4296 u16 xb_flags = le16_to_cpu(xb->xb_flags); 4297 4298 trace_ocfs2_xattr_create_index_block_begin( 4299 (unsigned long long)xb_bh->b_blocknr); 4300 4301 BUG_ON(xb_flags & OCFS2_XATTR_INDEXED); 4302 BUG_ON(!xs->bucket); 4303 4304 /* 4305 * XXX: 4306 * We can use this lock for now, and maybe move to a dedicated mutex 4307 * if performance becomes a problem later. 4308 */ 4309 down_write(&oi->ip_alloc_sem); 4310 4311 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), xb_bh, 4312 OCFS2_JOURNAL_ACCESS_WRITE); 4313 if (ret) { 4314 mlog_errno(ret); 4315 goto out; 4316 } 4317 4318 ret = __ocfs2_claim_clusters(handle, ctxt->data_ac, 4319 1, 1, &bit_off, &len); 4320 if (ret) { 4321 mlog_errno(ret); 4322 goto out; 4323 } 4324 4325 /* 4326 * The bucket may spread in many blocks, and 4327 * we will only touch the 1st block and the last block 4328 * in the whole bucket(one for entry and one for data). 4329 */ 4330 blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off); 4331 4332 trace_ocfs2_xattr_create_index_block((unsigned long long)blkno); 4333 4334 ret = ocfs2_init_xattr_bucket(xs->bucket, blkno, 1); 4335 if (ret) { 4336 mlog_errno(ret); 4337 goto out; 4338 } 4339 4340 ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket, 4341 OCFS2_JOURNAL_ACCESS_CREATE); 4342 if (ret) { 4343 mlog_errno(ret); 4344 goto out; 4345 } 4346 4347 ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket); 4348 ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket); 4349 4350 ocfs2_xattr_update_xattr_search(inode, xs, xb_bh); 4351 4352 /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */ 4353 memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize - 4354 offsetof(struct ocfs2_xattr_block, xb_attrs)); 4355 4356 xr = &xb->xb_attrs.xb_root; 4357 xr->xt_clusters = cpu_to_le32(1); 4358 xr->xt_last_eb_blk = 0; 4359 xr->xt_list.l_tree_depth = 0; 4360 xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb)); 4361 xr->xt_list.l_next_free_rec = cpu_to_le16(1); 4362 4363 xr->xt_list.l_recs[0].e_cpos = 0; 4364 xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno); 4365 xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1); 4366 4367 xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED); 4368 4369 ocfs2_journal_dirty(handle, xb_bh); 4370 4371 out: 4372 up_write(&oi->ip_alloc_sem); 4373 4374 return ret; 4375 } 4376 4377 static int cmp_xe_offset(const void *a, const void *b) 4378 { 4379 const struct ocfs2_xattr_entry *l = a, *r = b; 4380 u32 l_name_offset = le16_to_cpu(l->xe_name_offset); 4381 u32 r_name_offset = le16_to_cpu(r->xe_name_offset); 4382 4383 if (l_name_offset < r_name_offset) 4384 return 1; 4385 if (l_name_offset > r_name_offset) 4386 return -1; 4387 return 0; 4388 } 4389 4390 /* 4391 * defrag a xattr bucket if we find that the bucket has some 4392 * holes beteen name/value pairs. 4393 * We will move all the name/value pairs to the end of the bucket 4394 * so that we can spare some space for insertion. 4395 */ 4396 static int ocfs2_defrag_xattr_bucket(struct inode *inode, 4397 handle_t *handle, 4398 struct ocfs2_xattr_bucket *bucket) 4399 { 4400 int ret, i; 4401 size_t end, offset, len; 4402 struct ocfs2_xattr_header *xh; 4403 char *entries, *buf, *bucket_buf = NULL; 4404 u64 blkno = bucket_blkno(bucket); 4405 u16 xh_free_start; 4406 size_t blocksize = inode->i_sb->s_blocksize; 4407 struct ocfs2_xattr_entry *xe; 4408 4409 /* 4410 * In order to make the operation more efficient and generic, 4411 * we copy all the blocks into a contiguous memory and do the 4412 * defragment there, so if anything is error, we will not touch 4413 * the real block. 4414 */ 4415 bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS); 4416 if (!bucket_buf) { 4417 ret = -EIO; 4418 goto out; 4419 } 4420 4421 buf = bucket_buf; 4422 for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize) 4423 memcpy(buf, bucket_block(bucket, i), blocksize); 4424 4425 ret = ocfs2_xattr_bucket_journal_access(handle, bucket, 4426 OCFS2_JOURNAL_ACCESS_WRITE); 4427 if (ret < 0) { 4428 mlog_errno(ret); 4429 goto out; 4430 } 4431 4432 xh = (struct ocfs2_xattr_header *)bucket_buf; 4433 entries = (char *)xh->xh_entries; 4434 xh_free_start = le16_to_cpu(xh->xh_free_start); 4435 4436 trace_ocfs2_defrag_xattr_bucket( 4437 (unsigned long long)blkno, le16_to_cpu(xh->xh_count), 4438 xh_free_start, le16_to_cpu(xh->xh_name_value_len)); 4439 4440 /* 4441 * sort all the entries by their offset. 4442 * the largest will be the first, so that we can 4443 * move them to the end one by one. 4444 */ 4445 sort(entries, le16_to_cpu(xh->xh_count), 4446 sizeof(struct ocfs2_xattr_entry), 4447 cmp_xe_offset, swap_xe); 4448 4449 /* Move all name/values to the end of the bucket. */ 4450 xe = xh->xh_entries; 4451 end = OCFS2_XATTR_BUCKET_SIZE; 4452 for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) { 4453 offset = le16_to_cpu(xe->xe_name_offset); 4454 len = namevalue_size_xe(xe); 4455 4456 /* 4457 * We must make sure that the name/value pair 4458 * exist in the same block. So adjust end to 4459 * the previous block end if needed. 4460 */ 4461 if (((end - len) / blocksize != 4462 (end - 1) / blocksize)) 4463 end = end - end % blocksize; 4464 4465 if (end > offset + len) { 4466 memmove(bucket_buf + end - len, 4467 bucket_buf + offset, len); 4468 xe->xe_name_offset = cpu_to_le16(end - len); 4469 } 4470 4471 mlog_bug_on_msg(end < offset + len, "Defrag check failed for " 4472 "bucket %llu\n", (unsigned long long)blkno); 4473 4474 end -= len; 4475 } 4476 4477 mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for " 4478 "bucket %llu\n", (unsigned long long)blkno); 4479 4480 if (xh_free_start == end) 4481 goto out; 4482 4483 memset(bucket_buf + xh_free_start, 0, end - xh_free_start); 4484 xh->xh_free_start = cpu_to_le16(end); 4485 4486 /* sort the entries by their name_hash. */ 4487 sort(entries, le16_to_cpu(xh->xh_count), 4488 sizeof(struct ocfs2_xattr_entry), 4489 cmp_xe, swap_xe); 4490 4491 buf = bucket_buf; 4492 for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize) 4493 memcpy(bucket_block(bucket, i), buf, blocksize); 4494 ocfs2_xattr_bucket_journal_dirty(handle, bucket); 4495 4496 out: 4497 kfree(bucket_buf); 4498 return ret; 4499 } 4500 4501 /* 4502 * prev_blkno points to the start of an existing extent. new_blkno 4503 * points to a newly allocated extent. Because we know each of our 4504 * clusters contains more than bucket, we can easily split one cluster 4505 * at a bucket boundary. So we take the last cluster of the existing 4506 * extent and split it down the middle. We move the last half of the 4507 * buckets in the last cluster of the existing extent over to the new 4508 * extent. 4509 * 4510 * first_bh is the buffer at prev_blkno so we can update the existing 4511 * extent's bucket count. header_bh is the bucket were we were hoping 4512 * to insert our xattr. If the bucket move places the target in the new 4513 * extent, we'll update first_bh and header_bh after modifying the old 4514 * extent. 4515 * 4516 * first_hash will be set as the 1st xe's name_hash in the new extent. 4517 */ 4518 static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode, 4519 handle_t *handle, 4520 struct ocfs2_xattr_bucket *first, 4521 struct ocfs2_xattr_bucket *target, 4522 u64 new_blkno, 4523 u32 num_clusters, 4524 u32 *first_hash) 4525 { 4526 int ret; 4527 struct super_block *sb = inode->i_sb; 4528 int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(sb); 4529 int num_buckets = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb)); 4530 int to_move = num_buckets / 2; 4531 u64 src_blkno; 4532 u64 last_cluster_blkno = bucket_blkno(first) + 4533 ((num_clusters - 1) * ocfs2_clusters_to_blocks(sb, 1)); 4534 4535 BUG_ON(le16_to_cpu(bucket_xh(first)->xh_num_buckets) < num_buckets); 4536 BUG_ON(OCFS2_XATTR_BUCKET_SIZE == OCFS2_SB(sb)->s_clustersize); 4537 4538 trace_ocfs2_mv_xattr_bucket_cross_cluster( 4539 (unsigned long long)last_cluster_blkno, 4540 (unsigned long long)new_blkno); 4541 4542 ret = ocfs2_mv_xattr_buckets(inode, handle, bucket_blkno(first), 4543 last_cluster_blkno, new_blkno, 4544 to_move, first_hash); 4545 if (ret) { 4546 mlog_errno(ret); 4547 goto out; 4548 } 4549 4550 /* This is the first bucket that got moved */ 4551 src_blkno = last_cluster_blkno + (to_move * blks_per_bucket); 4552 4553 /* 4554 * If the target bucket was part of the moved buckets, we need to 4555 * update first and target. 4556 */ 4557 if (bucket_blkno(target) >= src_blkno) { 4558 /* Find the block for the new target bucket */ 4559 src_blkno = new_blkno + 4560 (bucket_blkno(target) - src_blkno); 4561 4562 ocfs2_xattr_bucket_relse(first); 4563 ocfs2_xattr_bucket_relse(target); 4564 4565 /* 4566 * These shouldn't fail - the buffers are in the 4567 * journal from ocfs2_cp_xattr_bucket(). 4568 */ 4569 ret = ocfs2_read_xattr_bucket(first, new_blkno); 4570 if (ret) { 4571 mlog_errno(ret); 4572 goto out; 4573 } 4574 ret = ocfs2_read_xattr_bucket(target, src_blkno); 4575 if (ret) 4576 mlog_errno(ret); 4577 4578 } 4579 4580 out: 4581 return ret; 4582 } 4583 4584 /* 4585 * Find the suitable pos when we divide a bucket into 2. 4586 * We have to make sure the xattrs with the same hash value exist 4587 * in the same bucket. 4588 * 4589 * If this ocfs2_xattr_header covers more than one hash value, find a 4590 * place where the hash value changes. Try to find the most even split. 4591 * The most common case is that all entries have different hash values, 4592 * and the first check we make will find a place to split. 4593 */ 4594 static int ocfs2_xattr_find_divide_pos(struct ocfs2_xattr_header *xh) 4595 { 4596 struct ocfs2_xattr_entry *entries = xh->xh_entries; 4597 int count = le16_to_cpu(xh->xh_count); 4598 int delta, middle = count / 2; 4599 4600 /* 4601 * We start at the middle. Each step gets farther away in both 4602 * directions. We therefore hit the change in hash value 4603 * nearest to the middle. Note that this loop does not execute for 4604 * count < 2. 4605 */ 4606 for (delta = 0; delta < middle; delta++) { 4607 /* Let's check delta earlier than middle */ 4608 if (cmp_xe(&entries[middle - delta - 1], 4609 &entries[middle - delta])) 4610 return middle - delta; 4611 4612 /* For even counts, don't walk off the end */ 4613 if ((middle + delta + 1) == count) 4614 continue; 4615 4616 /* Now try delta past middle */ 4617 if (cmp_xe(&entries[middle + delta], 4618 &entries[middle + delta + 1])) 4619 return middle + delta + 1; 4620 } 4621 4622 /* Every entry had the same hash */ 4623 return count; 4624 } 4625 4626 /* 4627 * Move some xattrs in old bucket(blk) to new bucket(new_blk). 4628 * first_hash will record the 1st hash of the new bucket. 4629 * 4630 * Normally half of the xattrs will be moved. But we have to make 4631 * sure that the xattrs with the same hash value are stored in the 4632 * same bucket. If all the xattrs in this bucket have the same hash 4633 * value, the new bucket will be initialized as an empty one and the 4634 * first_hash will be initialized as (hash_value+1). 4635 */ 4636 static int ocfs2_divide_xattr_bucket(struct inode *inode, 4637 handle_t *handle, 4638 u64 blk, 4639 u64 new_blk, 4640 u32 *first_hash, 4641 int new_bucket_head) 4642 { 4643 int ret, i; 4644 int count, start, len, name_value_len = 0, name_offset = 0; 4645 struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL; 4646 struct ocfs2_xattr_header *xh; 4647 struct ocfs2_xattr_entry *xe; 4648 int blocksize = inode->i_sb->s_blocksize; 4649 4650 trace_ocfs2_divide_xattr_bucket_begin((unsigned long long)blk, 4651 (unsigned long long)new_blk); 4652 4653 s_bucket = ocfs2_xattr_bucket_new(inode); 4654 t_bucket = ocfs2_xattr_bucket_new(inode); 4655 if (!s_bucket || !t_bucket) { 4656 ret = -ENOMEM; 4657 mlog_errno(ret); 4658 goto out; 4659 } 4660 4661 ret = ocfs2_read_xattr_bucket(s_bucket, blk); 4662 if (ret) { 4663 mlog_errno(ret); 4664 goto out; 4665 } 4666 4667 ret = ocfs2_xattr_bucket_journal_access(handle, s_bucket, 4668 OCFS2_JOURNAL_ACCESS_WRITE); 4669 if (ret) { 4670 mlog_errno(ret); 4671 goto out; 4672 } 4673 4674 /* 4675 * Even if !new_bucket_head, we're overwriting t_bucket. Thus, 4676 * there's no need to read it. 4677 */ 4678 ret = ocfs2_init_xattr_bucket(t_bucket, new_blk, new_bucket_head); 4679 if (ret) { 4680 mlog_errno(ret); 4681 goto out; 4682 } 4683 4684 /* 4685 * Hey, if we're overwriting t_bucket, what difference does 4686 * ACCESS_CREATE vs ACCESS_WRITE make? See the comment in the 4687 * same part of ocfs2_cp_xattr_bucket(). 4688 */ 4689 ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket, 4690 new_bucket_head ? 4691 OCFS2_JOURNAL_ACCESS_CREATE : 4692 OCFS2_JOURNAL_ACCESS_WRITE); 4693 if (ret) { 4694 mlog_errno(ret); 4695 goto out; 4696 } 4697 4698 xh = bucket_xh(s_bucket); 4699 count = le16_to_cpu(xh->xh_count); 4700 start = ocfs2_xattr_find_divide_pos(xh); 4701 4702 if (start == count) { 4703 xe = &xh->xh_entries[start-1]; 4704 4705 /* 4706 * initialized a new empty bucket here. 4707 * The hash value is set as one larger than 4708 * that of the last entry in the previous bucket. 4709 */ 4710 for (i = 0; i < t_bucket->bu_blocks; i++) 4711 memset(bucket_block(t_bucket, i), 0, blocksize); 4712 4713 xh = bucket_xh(t_bucket); 4714 xh->xh_free_start = cpu_to_le16(blocksize); 4715 xh->xh_entries[0].xe_name_hash = xe->xe_name_hash; 4716 le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1); 4717 4718 goto set_num_buckets; 4719 } 4720 4721 /* copy the whole bucket to the new first. */ 4722 ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket); 4723 4724 /* update the new bucket. */ 4725 xh = bucket_xh(t_bucket); 4726 4727 /* 4728 * Calculate the total name/value len and xh_free_start for 4729 * the old bucket first. 4730 */ 4731 name_offset = OCFS2_XATTR_BUCKET_SIZE; 4732 name_value_len = 0; 4733 for (i = 0; i < start; i++) { 4734 xe = &xh->xh_entries[i]; 4735 name_value_len += namevalue_size_xe(xe); 4736 if (le16_to_cpu(xe->xe_name_offset) < name_offset) 4737 name_offset = le16_to_cpu(xe->xe_name_offset); 4738 } 4739 4740 /* 4741 * Now begin the modification to the new bucket. 4742 * 4743 * In the new bucket, We just move the xattr entry to the beginning 4744 * and don't touch the name/value. So there will be some holes in the 4745 * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is 4746 * called. 4747 */ 4748 xe = &xh->xh_entries[start]; 4749 len = sizeof(struct ocfs2_xattr_entry) * (count - start); 4750 trace_ocfs2_divide_xattr_bucket_move(len, 4751 (int)((char *)xe - (char *)xh), 4752 (int)((char *)xh->xh_entries - (char *)xh)); 4753 memmove((char *)xh->xh_entries, (char *)xe, len); 4754 xe = &xh->xh_entries[count - start]; 4755 len = sizeof(struct ocfs2_xattr_entry) * start; 4756 memset((char *)xe, 0, len); 4757 4758 le16_add_cpu(&xh->xh_count, -start); 4759 le16_add_cpu(&xh->xh_name_value_len, -name_value_len); 4760 4761 /* Calculate xh_free_start for the new bucket. */ 4762 xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE); 4763 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 4764 xe = &xh->xh_entries[i]; 4765 if (le16_to_cpu(xe->xe_name_offset) < 4766 le16_to_cpu(xh->xh_free_start)) 4767 xh->xh_free_start = xe->xe_name_offset; 4768 } 4769 4770 set_num_buckets: 4771 /* set xh->xh_num_buckets for the new xh. */ 4772 if (new_bucket_head) 4773 xh->xh_num_buckets = cpu_to_le16(1); 4774 else 4775 xh->xh_num_buckets = 0; 4776 4777 ocfs2_xattr_bucket_journal_dirty(handle, t_bucket); 4778 4779 /* store the first_hash of the new bucket. */ 4780 if (first_hash) 4781 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash); 4782 4783 /* 4784 * Now only update the 1st block of the old bucket. If we 4785 * just added a new empty bucket, there is no need to modify 4786 * it. 4787 */ 4788 if (start == count) 4789 goto out; 4790 4791 xh = bucket_xh(s_bucket); 4792 memset(&xh->xh_entries[start], 0, 4793 sizeof(struct ocfs2_xattr_entry) * (count - start)); 4794 xh->xh_count = cpu_to_le16(start); 4795 xh->xh_free_start = cpu_to_le16(name_offset); 4796 xh->xh_name_value_len = cpu_to_le16(name_value_len); 4797 4798 ocfs2_xattr_bucket_journal_dirty(handle, s_bucket); 4799 4800 out: 4801 ocfs2_xattr_bucket_free(s_bucket); 4802 ocfs2_xattr_bucket_free(t_bucket); 4803 4804 return ret; 4805 } 4806 4807 /* 4808 * Copy xattr from one bucket to another bucket. 4809 * 4810 * The caller must make sure that the journal transaction 4811 * has enough space for journaling. 4812 */ 4813 static int ocfs2_cp_xattr_bucket(struct inode *inode, 4814 handle_t *handle, 4815 u64 s_blkno, 4816 u64 t_blkno, 4817 int t_is_new) 4818 { 4819 int ret; 4820 struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL; 4821 4822 BUG_ON(s_blkno == t_blkno); 4823 4824 trace_ocfs2_cp_xattr_bucket((unsigned long long)s_blkno, 4825 (unsigned long long)t_blkno, 4826 t_is_new); 4827 4828 s_bucket = ocfs2_xattr_bucket_new(inode); 4829 t_bucket = ocfs2_xattr_bucket_new(inode); 4830 if (!s_bucket || !t_bucket) { 4831 ret = -ENOMEM; 4832 mlog_errno(ret); 4833 goto out; 4834 } 4835 4836 ret = ocfs2_read_xattr_bucket(s_bucket, s_blkno); 4837 if (ret) 4838 goto out; 4839 4840 /* 4841 * Even if !t_is_new, we're overwriting t_bucket. Thus, 4842 * there's no need to read it. 4843 */ 4844 ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno, t_is_new); 4845 if (ret) 4846 goto out; 4847 4848 /* 4849 * Hey, if we're overwriting t_bucket, what difference does 4850 * ACCESS_CREATE vs ACCESS_WRITE make? Well, if we allocated a new 4851 * cluster to fill, we came here from 4852 * ocfs2_mv_xattr_buckets(), and it is really new - 4853 * ACCESS_CREATE is required. But we also might have moved data 4854 * out of t_bucket before extending back into it. 4855 * ocfs2_add_new_xattr_bucket() can do this - its call to 4856 * ocfs2_add_new_xattr_cluster() may have created a new extent 4857 * and copied out the end of the old extent. Then it re-extends 4858 * the old extent back to create space for new xattrs. That's 4859 * how we get here, and the bucket isn't really new. 4860 */ 4861 ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket, 4862 t_is_new ? 4863 OCFS2_JOURNAL_ACCESS_CREATE : 4864 OCFS2_JOURNAL_ACCESS_WRITE); 4865 if (ret) 4866 goto out; 4867 4868 ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket); 4869 ocfs2_xattr_bucket_journal_dirty(handle, t_bucket); 4870 4871 out: 4872 ocfs2_xattr_bucket_free(t_bucket); 4873 ocfs2_xattr_bucket_free(s_bucket); 4874 4875 return ret; 4876 } 4877 4878 /* 4879 * src_blk points to the start of an existing extent. last_blk points to 4880 * last cluster in that extent. to_blk points to a newly allocated 4881 * extent. We copy the buckets from the cluster at last_blk to the new 4882 * extent. If start_bucket is non-zero, we skip that many buckets before 4883 * we start copying. The new extent's xh_num_buckets gets set to the 4884 * number of buckets we copied. The old extent's xh_num_buckets shrinks 4885 * by the same amount. 4886 */ 4887 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle, 4888 u64 src_blk, u64 last_blk, u64 to_blk, 4889 unsigned int start_bucket, 4890 u32 *first_hash) 4891 { 4892 int i, ret, credits; 4893 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 4894 int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 4895 int num_buckets = ocfs2_xattr_buckets_per_cluster(osb); 4896 struct ocfs2_xattr_bucket *old_first, *new_first; 4897 4898 trace_ocfs2_mv_xattr_buckets((unsigned long long)last_blk, 4899 (unsigned long long)to_blk); 4900 4901 BUG_ON(start_bucket >= num_buckets); 4902 if (start_bucket) { 4903 num_buckets -= start_bucket; 4904 last_blk += (start_bucket * blks_per_bucket); 4905 } 4906 4907 /* The first bucket of the original extent */ 4908 old_first = ocfs2_xattr_bucket_new(inode); 4909 /* The first bucket of the new extent */ 4910 new_first = ocfs2_xattr_bucket_new(inode); 4911 if (!old_first || !new_first) { 4912 ret = -ENOMEM; 4913 mlog_errno(ret); 4914 goto out; 4915 } 4916 4917 ret = ocfs2_read_xattr_bucket(old_first, src_blk); 4918 if (ret) { 4919 mlog_errno(ret); 4920 goto out; 4921 } 4922 4923 /* 4924 * We need to update the first bucket of the old extent and all 4925 * the buckets going to the new extent. 4926 */ 4927 credits = ((num_buckets + 1) * blks_per_bucket); 4928 ret = ocfs2_extend_trans(handle, credits); 4929 if (ret) { 4930 mlog_errno(ret); 4931 goto out; 4932 } 4933 4934 ret = ocfs2_xattr_bucket_journal_access(handle, old_first, 4935 OCFS2_JOURNAL_ACCESS_WRITE); 4936 if (ret) { 4937 mlog_errno(ret); 4938 goto out; 4939 } 4940 4941 for (i = 0; i < num_buckets; i++) { 4942 ret = ocfs2_cp_xattr_bucket(inode, handle, 4943 last_blk + (i * blks_per_bucket), 4944 to_blk + (i * blks_per_bucket), 4945 1); 4946 if (ret) { 4947 mlog_errno(ret); 4948 goto out; 4949 } 4950 } 4951 4952 /* 4953 * Get the new bucket ready before we dirty anything 4954 * (This actually shouldn't fail, because we already dirtied 4955 * it once in ocfs2_cp_xattr_bucket()). 4956 */ 4957 ret = ocfs2_read_xattr_bucket(new_first, to_blk); 4958 if (ret) { 4959 mlog_errno(ret); 4960 goto out; 4961 } 4962 ret = ocfs2_xattr_bucket_journal_access(handle, new_first, 4963 OCFS2_JOURNAL_ACCESS_WRITE); 4964 if (ret) { 4965 mlog_errno(ret); 4966 goto out; 4967 } 4968 4969 /* Now update the headers */ 4970 le16_add_cpu(&bucket_xh(old_first)->xh_num_buckets, -num_buckets); 4971 ocfs2_xattr_bucket_journal_dirty(handle, old_first); 4972 4973 bucket_xh(new_first)->xh_num_buckets = cpu_to_le16(num_buckets); 4974 ocfs2_xattr_bucket_journal_dirty(handle, new_first); 4975 4976 if (first_hash) 4977 *first_hash = le32_to_cpu(bucket_xh(new_first)->xh_entries[0].xe_name_hash); 4978 4979 out: 4980 ocfs2_xattr_bucket_free(new_first); 4981 ocfs2_xattr_bucket_free(old_first); 4982 return ret; 4983 } 4984 4985 /* 4986 * Move some xattrs in this cluster to the new cluster. 4987 * This function should only be called when bucket size == cluster size. 4988 * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead. 4989 */ 4990 static int ocfs2_divide_xattr_cluster(struct inode *inode, 4991 handle_t *handle, 4992 u64 prev_blk, 4993 u64 new_blk, 4994 u32 *first_hash) 4995 { 4996 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 4997 int ret, credits = 2 * blk_per_bucket; 4998 4999 BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize); 5000 5001 ret = ocfs2_extend_trans(handle, credits); 5002 if (ret) { 5003 mlog_errno(ret); 5004 return ret; 5005 } 5006 5007 /* Move half of the xattr in start_blk to the next bucket. */ 5008 return ocfs2_divide_xattr_bucket(inode, handle, prev_blk, 5009 new_blk, first_hash, 1); 5010 } 5011 5012 /* 5013 * Move some xattrs from the old cluster to the new one since they are not 5014 * contiguous in ocfs2 xattr tree. 5015 * 5016 * new_blk starts a new separate cluster, and we will move some xattrs from 5017 * prev_blk to it. v_start will be set as the first name hash value in this 5018 * new cluster so that it can be used as e_cpos during tree insertion and 5019 * don't collide with our original b-tree operations. first_bh and header_bh 5020 * will also be updated since they will be used in ocfs2_extend_xattr_bucket 5021 * to extend the insert bucket. 5022 * 5023 * The problem is how much xattr should we move to the new one and when should 5024 * we update first_bh and header_bh? 5025 * 1. If cluster size > bucket size, that means the previous cluster has more 5026 * than 1 bucket, so just move half nums of bucket into the new cluster and 5027 * update the first_bh and header_bh if the insert bucket has been moved 5028 * to the new cluster. 5029 * 2. If cluster_size == bucket_size: 5030 * a) If the previous extent rec has more than one cluster and the insert 5031 * place isn't in the last cluster, copy the entire last cluster to the 5032 * new one. This time, we don't need to upate the first_bh and header_bh 5033 * since they will not be moved into the new cluster. 5034 * b) Otherwise, move the bottom half of the xattrs in the last cluster into 5035 * the new one. And we set the extend flag to zero if the insert place is 5036 * moved into the new allocated cluster since no extend is needed. 5037 */ 5038 static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode, 5039 handle_t *handle, 5040 struct ocfs2_xattr_bucket *first, 5041 struct ocfs2_xattr_bucket *target, 5042 u64 new_blk, 5043 u32 prev_clusters, 5044 u32 *v_start, 5045 int *extend) 5046 { 5047 int ret; 5048 5049 trace_ocfs2_adjust_xattr_cross_cluster( 5050 (unsigned long long)bucket_blkno(first), 5051 (unsigned long long)new_blk, prev_clusters); 5052 5053 if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) { 5054 ret = ocfs2_mv_xattr_bucket_cross_cluster(inode, 5055 handle, 5056 first, target, 5057 new_blk, 5058 prev_clusters, 5059 v_start); 5060 if (ret) 5061 mlog_errno(ret); 5062 } else { 5063 /* The start of the last cluster in the first extent */ 5064 u64 last_blk = bucket_blkno(first) + 5065 ((prev_clusters - 1) * 5066 ocfs2_clusters_to_blocks(inode->i_sb, 1)); 5067 5068 if (prev_clusters > 1 && bucket_blkno(target) != last_blk) { 5069 ret = ocfs2_mv_xattr_buckets(inode, handle, 5070 bucket_blkno(first), 5071 last_blk, new_blk, 0, 5072 v_start); 5073 if (ret) 5074 mlog_errno(ret); 5075 } else { 5076 ret = ocfs2_divide_xattr_cluster(inode, handle, 5077 last_blk, new_blk, 5078 v_start); 5079 if (ret) 5080 mlog_errno(ret); 5081 5082 if ((bucket_blkno(target) == last_blk) && extend) 5083 *extend = 0; 5084 } 5085 } 5086 5087 return ret; 5088 } 5089 5090 /* 5091 * Add a new cluster for xattr storage. 5092 * 5093 * If the new cluster is contiguous with the previous one, it will be 5094 * appended to the same extent record, and num_clusters will be updated. 5095 * If not, we will insert a new extent for it and move some xattrs in 5096 * the last cluster into the new allocated one. 5097 * We also need to limit the maximum size of a btree leaf, otherwise we'll 5098 * lose the benefits of hashing because we'll have to search large leaves. 5099 * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize, 5100 * if it's bigger). 5101 * 5102 * first_bh is the first block of the previous extent rec and header_bh 5103 * indicates the bucket we will insert the new xattrs. They will be updated 5104 * when the header_bh is moved into the new cluster. 5105 */ 5106 static int ocfs2_add_new_xattr_cluster(struct inode *inode, 5107 struct buffer_head *root_bh, 5108 struct ocfs2_xattr_bucket *first, 5109 struct ocfs2_xattr_bucket *target, 5110 u32 *num_clusters, 5111 u32 prev_cpos, 5112 int *extend, 5113 struct ocfs2_xattr_set_ctxt *ctxt) 5114 { 5115 int ret; 5116 u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 5117 u32 prev_clusters = *num_clusters; 5118 u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0; 5119 u64 block; 5120 handle_t *handle = ctxt->handle; 5121 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5122 struct ocfs2_extent_tree et; 5123 5124 trace_ocfs2_add_new_xattr_cluster_begin( 5125 (unsigned long long)OCFS2_I(inode)->ip_blkno, 5126 (unsigned long long)bucket_blkno(first), 5127 prev_cpos, prev_clusters); 5128 5129 ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh); 5130 5131 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh, 5132 OCFS2_JOURNAL_ACCESS_WRITE); 5133 if (ret < 0) { 5134 mlog_errno(ret); 5135 goto leave; 5136 } 5137 5138 ret = __ocfs2_claim_clusters(handle, ctxt->data_ac, 1, 5139 clusters_to_add, &bit_off, &num_bits); 5140 if (ret < 0) { 5141 if (ret != -ENOSPC) 5142 mlog_errno(ret); 5143 goto leave; 5144 } 5145 5146 BUG_ON(num_bits > clusters_to_add); 5147 5148 block = ocfs2_clusters_to_blocks(osb->sb, bit_off); 5149 trace_ocfs2_add_new_xattr_cluster((unsigned long long)block, num_bits); 5150 5151 if (bucket_blkno(first) + (prev_clusters * bpc) == block && 5152 (prev_clusters + num_bits) << osb->s_clustersize_bits <= 5153 OCFS2_MAX_XATTR_TREE_LEAF_SIZE) { 5154 /* 5155 * If this cluster is contiguous with the old one and 5156 * adding this new cluster, we don't surpass the limit of 5157 * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be 5158 * initialized and used like other buckets in the previous 5159 * cluster. 5160 * So add it as a contiguous one. The caller will handle 5161 * its init process. 5162 */ 5163 v_start = prev_cpos + prev_clusters; 5164 *num_clusters = prev_clusters + num_bits; 5165 } else { 5166 ret = ocfs2_adjust_xattr_cross_cluster(inode, 5167 handle, 5168 first, 5169 target, 5170 block, 5171 prev_clusters, 5172 &v_start, 5173 extend); 5174 if (ret) { 5175 mlog_errno(ret); 5176 goto leave; 5177 } 5178 } 5179 5180 trace_ocfs2_add_new_xattr_cluster_insert((unsigned long long)block, 5181 v_start, num_bits); 5182 ret = ocfs2_insert_extent(handle, &et, v_start, block, 5183 num_bits, 0, ctxt->meta_ac); 5184 if (ret < 0) { 5185 mlog_errno(ret); 5186 goto leave; 5187 } 5188 5189 ocfs2_journal_dirty(handle, root_bh); 5190 5191 leave: 5192 return ret; 5193 } 5194 5195 /* 5196 * We are given an extent. 'first' is the bucket at the very front of 5197 * the extent. The extent has space for an additional bucket past 5198 * bucket_xh(first)->xh_num_buckets. 'target_blkno' is the block number 5199 * of the target bucket. We wish to shift every bucket past the target 5200 * down one, filling in that additional space. When we get back to the 5201 * target, we split the target between itself and the now-empty bucket 5202 * at target+1 (aka, target_blkno + blks_per_bucket). 5203 */ 5204 static int ocfs2_extend_xattr_bucket(struct inode *inode, 5205 handle_t *handle, 5206 struct ocfs2_xattr_bucket *first, 5207 u64 target_blk, 5208 u32 num_clusters) 5209 { 5210 int ret, credits; 5211 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5212 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 5213 u64 end_blk; 5214 u16 new_bucket = le16_to_cpu(bucket_xh(first)->xh_num_buckets); 5215 5216 trace_ocfs2_extend_xattr_bucket((unsigned long long)target_blk, 5217 (unsigned long long)bucket_blkno(first), 5218 num_clusters, new_bucket); 5219 5220 /* The extent must have room for an additional bucket */ 5221 BUG_ON(new_bucket >= 5222 (num_clusters * ocfs2_xattr_buckets_per_cluster(osb))); 5223 5224 /* end_blk points to the last existing bucket */ 5225 end_blk = bucket_blkno(first) + ((new_bucket - 1) * blk_per_bucket); 5226 5227 /* 5228 * end_blk is the start of the last existing bucket. 5229 * Thus, (end_blk - target_blk) covers the target bucket and 5230 * every bucket after it up to, but not including, the last 5231 * existing bucket. Then we add the last existing bucket, the 5232 * new bucket, and the first bucket (3 * blk_per_bucket). 5233 */ 5234 credits = (end_blk - target_blk) + (3 * blk_per_bucket); 5235 ret = ocfs2_extend_trans(handle, credits); 5236 if (ret) { 5237 mlog_errno(ret); 5238 goto out; 5239 } 5240 5241 ret = ocfs2_xattr_bucket_journal_access(handle, first, 5242 OCFS2_JOURNAL_ACCESS_WRITE); 5243 if (ret) { 5244 mlog_errno(ret); 5245 goto out; 5246 } 5247 5248 while (end_blk != target_blk) { 5249 ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk, 5250 end_blk + blk_per_bucket, 0); 5251 if (ret) 5252 goto out; 5253 end_blk -= blk_per_bucket; 5254 } 5255 5256 /* Move half of the xattr in target_blkno to the next bucket. */ 5257 ret = ocfs2_divide_xattr_bucket(inode, handle, target_blk, 5258 target_blk + blk_per_bucket, NULL, 0); 5259 5260 le16_add_cpu(&bucket_xh(first)->xh_num_buckets, 1); 5261 ocfs2_xattr_bucket_journal_dirty(handle, first); 5262 5263 out: 5264 return ret; 5265 } 5266 5267 /* 5268 * Add new xattr bucket in an extent record and adjust the buckets 5269 * accordingly. xb_bh is the ocfs2_xattr_block, and target is the 5270 * bucket we want to insert into. 5271 * 5272 * In the easy case, we will move all the buckets after target down by 5273 * one. Half of target's xattrs will be moved to the next bucket. 5274 * 5275 * If current cluster is full, we'll allocate a new one. This may not 5276 * be contiguous. The underlying calls will make sure that there is 5277 * space for the insert, shifting buckets around if necessary. 5278 * 'target' may be moved by those calls. 5279 */ 5280 static int ocfs2_add_new_xattr_bucket(struct inode *inode, 5281 struct buffer_head *xb_bh, 5282 struct ocfs2_xattr_bucket *target, 5283 struct ocfs2_xattr_set_ctxt *ctxt) 5284 { 5285 struct ocfs2_xattr_block *xb = 5286 (struct ocfs2_xattr_block *)xb_bh->b_data; 5287 struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root; 5288 struct ocfs2_extent_list *el = &xb_root->xt_list; 5289 u32 name_hash = 5290 le32_to_cpu(bucket_xh(target)->xh_entries[0].xe_name_hash); 5291 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5292 int ret, num_buckets, extend = 1; 5293 u64 p_blkno; 5294 u32 e_cpos, num_clusters; 5295 /* The bucket at the front of the extent */ 5296 struct ocfs2_xattr_bucket *first; 5297 5298 trace_ocfs2_add_new_xattr_bucket( 5299 (unsigned long long)bucket_blkno(target)); 5300 5301 /* The first bucket of the original extent */ 5302 first = ocfs2_xattr_bucket_new(inode); 5303 if (!first) { 5304 ret = -ENOMEM; 5305 mlog_errno(ret); 5306 goto out; 5307 } 5308 5309 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos, 5310 &num_clusters, el); 5311 if (ret) { 5312 mlog_errno(ret); 5313 goto out; 5314 } 5315 5316 ret = ocfs2_read_xattr_bucket(first, p_blkno); 5317 if (ret) { 5318 mlog_errno(ret); 5319 goto out; 5320 } 5321 5322 num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters; 5323 if (num_buckets == le16_to_cpu(bucket_xh(first)->xh_num_buckets)) { 5324 /* 5325 * This can move first+target if the target bucket moves 5326 * to the new extent. 5327 */ 5328 ret = ocfs2_add_new_xattr_cluster(inode, 5329 xb_bh, 5330 first, 5331 target, 5332 &num_clusters, 5333 e_cpos, 5334 &extend, 5335 ctxt); 5336 if (ret) { 5337 mlog_errno(ret); 5338 goto out; 5339 } 5340 } 5341 5342 if (extend) { 5343 ret = ocfs2_extend_xattr_bucket(inode, 5344 ctxt->handle, 5345 first, 5346 bucket_blkno(target), 5347 num_clusters); 5348 if (ret) 5349 mlog_errno(ret); 5350 } 5351 5352 out: 5353 ocfs2_xattr_bucket_free(first); 5354 5355 return ret; 5356 } 5357 5358 /* 5359 * Truncate the specified xe_off entry in xattr bucket. 5360 * bucket is indicated by header_bh and len is the new length. 5361 * Both the ocfs2_xattr_value_root and the entry will be updated here. 5362 * 5363 * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed. 5364 */ 5365 static int ocfs2_xattr_bucket_value_truncate(struct inode *inode, 5366 struct ocfs2_xattr_bucket *bucket, 5367 int xe_off, 5368 int len, 5369 struct ocfs2_xattr_set_ctxt *ctxt) 5370 { 5371 int ret, offset; 5372 u64 value_blk; 5373 struct ocfs2_xattr_entry *xe; 5374 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 5375 size_t blocksize = inode->i_sb->s_blocksize; 5376 struct ocfs2_xattr_value_buf vb = { 5377 .vb_access = ocfs2_journal_access, 5378 }; 5379 5380 xe = &xh->xh_entries[xe_off]; 5381 5382 BUG_ON(!xe || ocfs2_xattr_is_local(xe)); 5383 5384 offset = le16_to_cpu(xe->xe_name_offset) + 5385 OCFS2_XATTR_SIZE(xe->xe_name_len); 5386 5387 value_blk = offset / blocksize; 5388 5389 /* We don't allow ocfs2_xattr_value to be stored in different block. */ 5390 BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize); 5391 5392 vb.vb_bh = bucket->bu_bhs[value_blk]; 5393 BUG_ON(!vb.vb_bh); 5394 5395 vb.vb_xv = (struct ocfs2_xattr_value_root *) 5396 (vb.vb_bh->b_data + offset % blocksize); 5397 5398 /* 5399 * From here on out we have to dirty the bucket. The generic 5400 * value calls only modify one of the bucket's bhs, but we need 5401 * to send the bucket at once. So if they error, they *could* have 5402 * modified something. We have to assume they did, and dirty 5403 * the whole bucket. This leaves us in a consistent state. 5404 */ 5405 trace_ocfs2_xattr_bucket_value_truncate( 5406 (unsigned long long)bucket_blkno(bucket), xe_off, len); 5407 ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt); 5408 if (ret) { 5409 mlog_errno(ret); 5410 goto out; 5411 } 5412 5413 ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket, 5414 OCFS2_JOURNAL_ACCESS_WRITE); 5415 if (ret) { 5416 mlog_errno(ret); 5417 goto out; 5418 } 5419 5420 xe->xe_value_size = cpu_to_le64(len); 5421 5422 ocfs2_xattr_bucket_journal_dirty(ctxt->handle, bucket); 5423 5424 out: 5425 return ret; 5426 } 5427 5428 static int ocfs2_rm_xattr_cluster(struct inode *inode, 5429 struct buffer_head *root_bh, 5430 u64 blkno, 5431 u32 cpos, 5432 u32 len, 5433 void *para) 5434 { 5435 int ret; 5436 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5437 struct inode *tl_inode = osb->osb_tl_inode; 5438 handle_t *handle; 5439 struct ocfs2_xattr_block *xb = 5440 (struct ocfs2_xattr_block *)root_bh->b_data; 5441 struct ocfs2_alloc_context *meta_ac = NULL; 5442 struct ocfs2_cached_dealloc_ctxt dealloc; 5443 struct ocfs2_extent_tree et; 5444 5445 ret = ocfs2_iterate_xattr_buckets(inode, blkno, len, 5446 ocfs2_delete_xattr_in_bucket, para); 5447 if (ret) { 5448 mlog_errno(ret); 5449 return ret; 5450 } 5451 5452 ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh); 5453 5454 ocfs2_init_dealloc_ctxt(&dealloc); 5455 5456 trace_ocfs2_rm_xattr_cluster( 5457 (unsigned long long)OCFS2_I(inode)->ip_blkno, 5458 (unsigned long long)blkno, cpos, len); 5459 5460 ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), blkno, 5461 len); 5462 5463 ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac); 5464 if (ret) { 5465 mlog_errno(ret); 5466 return ret; 5467 } 5468 5469 inode_lock(tl_inode); 5470 5471 if (ocfs2_truncate_log_needs_flush(osb)) { 5472 ret = __ocfs2_flush_truncate_log(osb); 5473 if (ret < 0) { 5474 mlog_errno(ret); 5475 goto out; 5476 } 5477 } 5478 5479 handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb)); 5480 if (IS_ERR(handle)) { 5481 ret = -ENOMEM; 5482 mlog_errno(ret); 5483 goto out; 5484 } 5485 5486 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh, 5487 OCFS2_JOURNAL_ACCESS_WRITE); 5488 if (ret) { 5489 mlog_errno(ret); 5490 goto out_commit; 5491 } 5492 5493 ret = ocfs2_remove_extent(handle, &et, cpos, len, meta_ac, 5494 &dealloc); 5495 if (ret) { 5496 mlog_errno(ret); 5497 goto out_commit; 5498 } 5499 5500 le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len); 5501 ocfs2_journal_dirty(handle, root_bh); 5502 5503 ret = ocfs2_truncate_log_append(osb, handle, blkno, len); 5504 if (ret) 5505 mlog_errno(ret); 5506 ocfs2_update_inode_fsync_trans(handle, inode, 0); 5507 5508 out_commit: 5509 ocfs2_commit_trans(osb, handle); 5510 out: 5511 ocfs2_schedule_truncate_log_flush(osb, 1); 5512 5513 inode_unlock(tl_inode); 5514 5515 if (meta_ac) 5516 ocfs2_free_alloc_context(meta_ac); 5517 5518 ocfs2_run_deallocs(osb, &dealloc); 5519 5520 return ret; 5521 } 5522 5523 /* 5524 * check whether the xattr bucket is filled up with the same hash value. 5525 * If we want to insert the xattr with the same hash, return -ENOSPC. 5526 * If we want to insert a xattr with different hash value, go ahead 5527 * and ocfs2_divide_xattr_bucket will handle this. 5528 */ 5529 static int ocfs2_check_xattr_bucket_collision(struct inode *inode, 5530 struct ocfs2_xattr_bucket *bucket, 5531 const char *name) 5532 { 5533 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 5534 u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name)); 5535 5536 if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash)) 5537 return 0; 5538 5539 if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash == 5540 xh->xh_entries[0].xe_name_hash) { 5541 mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, " 5542 "hash = %u\n", 5543 (unsigned long long)bucket_blkno(bucket), 5544 le32_to_cpu(xh->xh_entries[0].xe_name_hash)); 5545 return -ENOSPC; 5546 } 5547 5548 return 0; 5549 } 5550 5551 /* 5552 * Try to set the entry in the current bucket. If we fail, the caller 5553 * will handle getting us another bucket. 5554 */ 5555 static int ocfs2_xattr_set_entry_bucket(struct inode *inode, 5556 struct ocfs2_xattr_info *xi, 5557 struct ocfs2_xattr_search *xs, 5558 struct ocfs2_xattr_set_ctxt *ctxt) 5559 { 5560 int ret; 5561 struct ocfs2_xa_loc loc; 5562 5563 trace_ocfs2_xattr_set_entry_bucket(xi->xi_name); 5564 5565 ocfs2_init_xattr_bucket_xa_loc(&loc, xs->bucket, 5566 xs->not_found ? NULL : xs->here); 5567 ret = ocfs2_xa_set(&loc, xi, ctxt); 5568 if (!ret) { 5569 xs->here = loc.xl_entry; 5570 goto out; 5571 } 5572 if (ret != -ENOSPC) { 5573 mlog_errno(ret); 5574 goto out; 5575 } 5576 5577 /* Ok, we need space. Let's try defragmenting the bucket. */ 5578 ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle, 5579 xs->bucket); 5580 if (ret) { 5581 mlog_errno(ret); 5582 goto out; 5583 } 5584 5585 ret = ocfs2_xa_set(&loc, xi, ctxt); 5586 if (!ret) { 5587 xs->here = loc.xl_entry; 5588 goto out; 5589 } 5590 if (ret != -ENOSPC) 5591 mlog_errno(ret); 5592 5593 5594 out: 5595 return ret; 5596 } 5597 5598 static int ocfs2_xattr_set_entry_index_block(struct inode *inode, 5599 struct ocfs2_xattr_info *xi, 5600 struct ocfs2_xattr_search *xs, 5601 struct ocfs2_xattr_set_ctxt *ctxt) 5602 { 5603 int ret; 5604 5605 trace_ocfs2_xattr_set_entry_index_block(xi->xi_name); 5606 5607 ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt); 5608 if (!ret) 5609 goto out; 5610 if (ret != -ENOSPC) { 5611 mlog_errno(ret); 5612 goto out; 5613 } 5614 5615 /* Ack, need more space. Let's try to get another bucket! */ 5616 5617 /* 5618 * We do not allow for overlapping ranges between buckets. And 5619 * the maximum number of collisions we will allow for then is 5620 * one bucket's worth, so check it here whether we need to 5621 * add a new bucket for the insert. 5622 */ 5623 ret = ocfs2_check_xattr_bucket_collision(inode, 5624 xs->bucket, 5625 xi->xi_name); 5626 if (ret) { 5627 mlog_errno(ret); 5628 goto out; 5629 } 5630 5631 ret = ocfs2_add_new_xattr_bucket(inode, 5632 xs->xattr_bh, 5633 xs->bucket, 5634 ctxt); 5635 if (ret) { 5636 mlog_errno(ret); 5637 goto out; 5638 } 5639 5640 /* 5641 * ocfs2_add_new_xattr_bucket() will have updated 5642 * xs->bucket if it moved, but it will not have updated 5643 * any of the other search fields. Thus, we drop it and 5644 * re-search. Everything should be cached, so it'll be 5645 * quick. 5646 */ 5647 ocfs2_xattr_bucket_relse(xs->bucket); 5648 ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh, 5649 xi->xi_name_index, 5650 xi->xi_name, xs); 5651 if (ret && ret != -ENODATA) 5652 goto out; 5653 xs->not_found = ret; 5654 5655 /* Ok, we have a new bucket, let's try again */ 5656 ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt); 5657 if (ret && (ret != -ENOSPC)) 5658 mlog_errno(ret); 5659 5660 out: 5661 return ret; 5662 } 5663 5664 static int ocfs2_delete_xattr_in_bucket(struct inode *inode, 5665 struct ocfs2_xattr_bucket *bucket, 5666 void *para) 5667 { 5668 int ret = 0, ref_credits; 5669 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 5670 u16 i; 5671 struct ocfs2_xattr_entry *xe; 5672 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5673 struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,}; 5674 int credits = ocfs2_remove_extent_credits(osb->sb) + 5675 ocfs2_blocks_per_xattr_bucket(inode->i_sb); 5676 struct ocfs2_xattr_value_root *xv; 5677 struct ocfs2_rm_xattr_bucket_para *args = 5678 (struct ocfs2_rm_xattr_bucket_para *)para; 5679 5680 ocfs2_init_dealloc_ctxt(&ctxt.dealloc); 5681 5682 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 5683 xe = &xh->xh_entries[i]; 5684 if (ocfs2_xattr_is_local(xe)) 5685 continue; 5686 5687 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, 5688 i, &xv, NULL); 5689 if (ret) { 5690 mlog_errno(ret); 5691 break; 5692 } 5693 5694 ret = ocfs2_lock_xattr_remove_allocators(inode, xv, 5695 args->ref_ci, 5696 args->ref_root_bh, 5697 &ctxt.meta_ac, 5698 &ref_credits); 5699 5700 ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits); 5701 if (IS_ERR(ctxt.handle)) { 5702 ret = PTR_ERR(ctxt.handle); 5703 mlog_errno(ret); 5704 break; 5705 } 5706 5707 ret = ocfs2_xattr_bucket_value_truncate(inode, bucket, 5708 i, 0, &ctxt); 5709 5710 ocfs2_commit_trans(osb, ctxt.handle); 5711 if (ctxt.meta_ac) { 5712 ocfs2_free_alloc_context(ctxt.meta_ac); 5713 ctxt.meta_ac = NULL; 5714 } 5715 if (ret) { 5716 mlog_errno(ret); 5717 break; 5718 } 5719 } 5720 5721 if (ctxt.meta_ac) 5722 ocfs2_free_alloc_context(ctxt.meta_ac); 5723 ocfs2_schedule_truncate_log_flush(osb, 1); 5724 ocfs2_run_deallocs(osb, &ctxt.dealloc); 5725 return ret; 5726 } 5727 5728 /* 5729 * Whenever we modify a xattr value root in the bucket(e.g, CoW 5730 * or change the extent record flag), we need to recalculate 5731 * the metaecc for the whole bucket. So it is done here. 5732 * 5733 * Note: 5734 * We have to give the extra credits for the caller. 5735 */ 5736 static int ocfs2_xattr_bucket_post_refcount(struct inode *inode, 5737 handle_t *handle, 5738 void *para) 5739 { 5740 int ret; 5741 struct ocfs2_xattr_bucket *bucket = 5742 (struct ocfs2_xattr_bucket *)para; 5743 5744 ret = ocfs2_xattr_bucket_journal_access(handle, bucket, 5745 OCFS2_JOURNAL_ACCESS_WRITE); 5746 if (ret) { 5747 mlog_errno(ret); 5748 return ret; 5749 } 5750 5751 ocfs2_xattr_bucket_journal_dirty(handle, bucket); 5752 5753 return 0; 5754 } 5755 5756 /* 5757 * Special action we need if the xattr value is refcounted. 5758 * 5759 * 1. If the xattr is refcounted, lock the tree. 5760 * 2. CoW the xattr if we are setting the new value and the value 5761 * will be stored outside. 5762 * 3. In other case, decrease_refcount will work for us, so just 5763 * lock the refcount tree, calculate the meta and credits is OK. 5764 * 5765 * We have to do CoW before ocfs2_init_xattr_set_ctxt since 5766 * currently CoW is a completed transaction, while this function 5767 * will also lock the allocators and let us deadlock. So we will 5768 * CoW the whole xattr value. 5769 */ 5770 static int ocfs2_prepare_refcount_xattr(struct inode *inode, 5771 struct ocfs2_dinode *di, 5772 struct ocfs2_xattr_info *xi, 5773 struct ocfs2_xattr_search *xis, 5774 struct ocfs2_xattr_search *xbs, 5775 struct ocfs2_refcount_tree **ref_tree, 5776 int *meta_add, 5777 int *credits) 5778 { 5779 int ret = 0; 5780 struct ocfs2_xattr_block *xb; 5781 struct ocfs2_xattr_entry *xe; 5782 char *base; 5783 u32 p_cluster, num_clusters; 5784 unsigned int ext_flags; 5785 int name_offset, name_len; 5786 struct ocfs2_xattr_value_buf vb; 5787 struct ocfs2_xattr_bucket *bucket = NULL; 5788 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5789 struct ocfs2_post_refcount refcount; 5790 struct ocfs2_post_refcount *p = NULL; 5791 struct buffer_head *ref_root_bh = NULL; 5792 5793 if (!xis->not_found) { 5794 xe = xis->here; 5795 name_offset = le16_to_cpu(xe->xe_name_offset); 5796 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 5797 base = xis->base; 5798 vb.vb_bh = xis->inode_bh; 5799 vb.vb_access = ocfs2_journal_access_di; 5800 } else { 5801 int i, block_off = 0; 5802 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; 5803 xe = xbs->here; 5804 name_offset = le16_to_cpu(xe->xe_name_offset); 5805 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 5806 i = xbs->here - xbs->header->xh_entries; 5807 5808 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 5809 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 5810 bucket_xh(xbs->bucket), 5811 i, &block_off, 5812 &name_offset); 5813 if (ret) { 5814 mlog_errno(ret); 5815 goto out; 5816 } 5817 base = bucket_block(xbs->bucket, block_off); 5818 vb.vb_bh = xbs->bucket->bu_bhs[block_off]; 5819 vb.vb_access = ocfs2_journal_access; 5820 5821 if (ocfs2_meta_ecc(osb)) { 5822 /*create parameters for ocfs2_post_refcount. */ 5823 bucket = xbs->bucket; 5824 refcount.credits = bucket->bu_blocks; 5825 refcount.para = bucket; 5826 refcount.func = 5827 ocfs2_xattr_bucket_post_refcount; 5828 p = &refcount; 5829 } 5830 } else { 5831 base = xbs->base; 5832 vb.vb_bh = xbs->xattr_bh; 5833 vb.vb_access = ocfs2_journal_access_xb; 5834 } 5835 } 5836 5837 if (ocfs2_xattr_is_local(xe)) 5838 goto out; 5839 5840 vb.vb_xv = (struct ocfs2_xattr_value_root *) 5841 (base + name_offset + name_len); 5842 5843 ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster, 5844 &num_clusters, &vb.vb_xv->xr_list, 5845 &ext_flags); 5846 if (ret) { 5847 mlog_errno(ret); 5848 goto out; 5849 } 5850 5851 /* 5852 * We just need to check the 1st extent record, since we always 5853 * CoW the whole xattr. So there shouldn't be a xattr with 5854 * some REFCOUNT extent recs after the 1st one. 5855 */ 5856 if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) 5857 goto out; 5858 5859 ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc), 5860 1, ref_tree, &ref_root_bh); 5861 if (ret) { 5862 mlog_errno(ret); 5863 goto out; 5864 } 5865 5866 /* 5867 * If we are deleting the xattr or the new size will be stored inside, 5868 * cool, leave it there, the xattr truncate process will remove them 5869 * for us(it still needs the refcount tree lock and the meta, credits). 5870 * And the worse case is that every cluster truncate will split the 5871 * refcount tree, and make the original extent become 3. So we will need 5872 * 2 * cluster more extent recs at most. 5873 */ 5874 if (!xi->xi_value || xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE) { 5875 5876 ret = ocfs2_refcounted_xattr_delete_need(inode, 5877 &(*ref_tree)->rf_ci, 5878 ref_root_bh, vb.vb_xv, 5879 meta_add, credits); 5880 if (ret) 5881 mlog_errno(ret); 5882 goto out; 5883 } 5884 5885 ret = ocfs2_refcount_cow_xattr(inode, di, &vb, 5886 *ref_tree, ref_root_bh, 0, 5887 le32_to_cpu(vb.vb_xv->xr_clusters), p); 5888 if (ret) 5889 mlog_errno(ret); 5890 5891 out: 5892 brelse(ref_root_bh); 5893 return ret; 5894 } 5895 5896 /* 5897 * Add the REFCOUNTED flags for all the extent rec in ocfs2_xattr_value_root. 5898 * The physical clusters will be added to refcount tree. 5899 */ 5900 static int ocfs2_xattr_value_attach_refcount(struct inode *inode, 5901 struct ocfs2_xattr_value_root *xv, 5902 struct ocfs2_extent_tree *value_et, 5903 struct ocfs2_caching_info *ref_ci, 5904 struct buffer_head *ref_root_bh, 5905 struct ocfs2_cached_dealloc_ctxt *dealloc, 5906 struct ocfs2_post_refcount *refcount) 5907 { 5908 int ret = 0; 5909 u32 clusters = le32_to_cpu(xv->xr_clusters); 5910 u32 cpos, p_cluster, num_clusters; 5911 struct ocfs2_extent_list *el = &xv->xr_list; 5912 unsigned int ext_flags; 5913 5914 cpos = 0; 5915 while (cpos < clusters) { 5916 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, 5917 &num_clusters, el, &ext_flags); 5918 if (ret) { 5919 mlog_errno(ret); 5920 break; 5921 } 5922 5923 cpos += num_clusters; 5924 if ((ext_flags & OCFS2_EXT_REFCOUNTED)) 5925 continue; 5926 5927 BUG_ON(!p_cluster); 5928 5929 ret = ocfs2_add_refcount_flag(inode, value_et, 5930 ref_ci, ref_root_bh, 5931 cpos - num_clusters, 5932 p_cluster, num_clusters, 5933 dealloc, refcount); 5934 if (ret) { 5935 mlog_errno(ret); 5936 break; 5937 } 5938 } 5939 5940 return ret; 5941 } 5942 5943 /* 5944 * Given a normal ocfs2_xattr_header, refcount all the entries which 5945 * have value stored outside. 5946 * Used for xattrs stored in inode and ocfs2_xattr_block. 5947 */ 5948 static int ocfs2_xattr_attach_refcount_normal(struct inode *inode, 5949 struct ocfs2_xattr_value_buf *vb, 5950 struct ocfs2_xattr_header *header, 5951 struct ocfs2_caching_info *ref_ci, 5952 struct buffer_head *ref_root_bh, 5953 struct ocfs2_cached_dealloc_ctxt *dealloc) 5954 { 5955 5956 struct ocfs2_xattr_entry *xe; 5957 struct ocfs2_xattr_value_root *xv; 5958 struct ocfs2_extent_tree et; 5959 int i, ret = 0; 5960 5961 for (i = 0; i < le16_to_cpu(header->xh_count); i++) { 5962 xe = &header->xh_entries[i]; 5963 5964 if (ocfs2_xattr_is_local(xe)) 5965 continue; 5966 5967 xv = (struct ocfs2_xattr_value_root *)((void *)header + 5968 le16_to_cpu(xe->xe_name_offset) + 5969 OCFS2_XATTR_SIZE(xe->xe_name_len)); 5970 5971 vb->vb_xv = xv; 5972 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); 5973 5974 ret = ocfs2_xattr_value_attach_refcount(inode, xv, &et, 5975 ref_ci, ref_root_bh, 5976 dealloc, NULL); 5977 if (ret) { 5978 mlog_errno(ret); 5979 break; 5980 } 5981 } 5982 5983 return ret; 5984 } 5985 5986 static int ocfs2_xattr_inline_attach_refcount(struct inode *inode, 5987 struct buffer_head *fe_bh, 5988 struct ocfs2_caching_info *ref_ci, 5989 struct buffer_head *ref_root_bh, 5990 struct ocfs2_cached_dealloc_ctxt *dealloc) 5991 { 5992 struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data; 5993 struct ocfs2_xattr_header *header = (struct ocfs2_xattr_header *) 5994 (fe_bh->b_data + inode->i_sb->s_blocksize - 5995 le16_to_cpu(di->i_xattr_inline_size)); 5996 struct ocfs2_xattr_value_buf vb = { 5997 .vb_bh = fe_bh, 5998 .vb_access = ocfs2_journal_access_di, 5999 }; 6000 6001 return ocfs2_xattr_attach_refcount_normal(inode, &vb, header, 6002 ref_ci, ref_root_bh, dealloc); 6003 } 6004 6005 struct ocfs2_xattr_tree_value_refcount_para { 6006 struct ocfs2_caching_info *ref_ci; 6007 struct buffer_head *ref_root_bh; 6008 struct ocfs2_cached_dealloc_ctxt *dealloc; 6009 }; 6010 6011 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb, 6012 struct ocfs2_xattr_bucket *bucket, 6013 int offset, 6014 struct ocfs2_xattr_value_root **xv, 6015 struct buffer_head **bh) 6016 { 6017 int ret, block_off, name_offset; 6018 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 6019 struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset]; 6020 void *base; 6021 6022 ret = ocfs2_xattr_bucket_get_name_value(sb, 6023 bucket_xh(bucket), 6024 offset, 6025 &block_off, 6026 &name_offset); 6027 if (ret) { 6028 mlog_errno(ret); 6029 goto out; 6030 } 6031 6032 base = bucket_block(bucket, block_off); 6033 6034 *xv = (struct ocfs2_xattr_value_root *)(base + name_offset + 6035 OCFS2_XATTR_SIZE(xe->xe_name_len)); 6036 6037 if (bh) 6038 *bh = bucket->bu_bhs[block_off]; 6039 out: 6040 return ret; 6041 } 6042 6043 /* 6044 * For a given xattr bucket, refcount all the entries which 6045 * have value stored outside. 6046 */ 6047 static int ocfs2_xattr_bucket_value_refcount(struct inode *inode, 6048 struct ocfs2_xattr_bucket *bucket, 6049 void *para) 6050 { 6051 int i, ret = 0; 6052 struct ocfs2_extent_tree et; 6053 struct ocfs2_xattr_tree_value_refcount_para *ref = 6054 (struct ocfs2_xattr_tree_value_refcount_para *)para; 6055 struct ocfs2_xattr_header *xh = 6056 (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data; 6057 struct ocfs2_xattr_entry *xe; 6058 struct ocfs2_xattr_value_buf vb = { 6059 .vb_access = ocfs2_journal_access, 6060 }; 6061 struct ocfs2_post_refcount refcount = { 6062 .credits = bucket->bu_blocks, 6063 .para = bucket, 6064 .func = ocfs2_xattr_bucket_post_refcount, 6065 }; 6066 struct ocfs2_post_refcount *p = NULL; 6067 6068 /* We only need post_refcount if we support metaecc. */ 6069 if (ocfs2_meta_ecc(OCFS2_SB(inode->i_sb))) 6070 p = &refcount; 6071 6072 trace_ocfs2_xattr_bucket_value_refcount( 6073 (unsigned long long)bucket_blkno(bucket), 6074 le16_to_cpu(xh->xh_count)); 6075 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 6076 xe = &xh->xh_entries[i]; 6077 6078 if (ocfs2_xattr_is_local(xe)) 6079 continue; 6080 6081 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, i, 6082 &vb.vb_xv, &vb.vb_bh); 6083 if (ret) { 6084 mlog_errno(ret); 6085 break; 6086 } 6087 6088 ocfs2_init_xattr_value_extent_tree(&et, 6089 INODE_CACHE(inode), &vb); 6090 6091 ret = ocfs2_xattr_value_attach_refcount(inode, vb.vb_xv, 6092 &et, ref->ref_ci, 6093 ref->ref_root_bh, 6094 ref->dealloc, p); 6095 if (ret) { 6096 mlog_errno(ret); 6097 break; 6098 } 6099 } 6100 6101 return ret; 6102 6103 } 6104 6105 static int ocfs2_refcount_xattr_tree_rec(struct inode *inode, 6106 struct buffer_head *root_bh, 6107 u64 blkno, u32 cpos, u32 len, void *para) 6108 { 6109 return ocfs2_iterate_xattr_buckets(inode, blkno, len, 6110 ocfs2_xattr_bucket_value_refcount, 6111 para); 6112 } 6113 6114 static int ocfs2_xattr_block_attach_refcount(struct inode *inode, 6115 struct buffer_head *blk_bh, 6116 struct ocfs2_caching_info *ref_ci, 6117 struct buffer_head *ref_root_bh, 6118 struct ocfs2_cached_dealloc_ctxt *dealloc) 6119 { 6120 int ret = 0; 6121 struct ocfs2_xattr_block *xb = 6122 (struct ocfs2_xattr_block *)blk_bh->b_data; 6123 6124 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 6125 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header; 6126 struct ocfs2_xattr_value_buf vb = { 6127 .vb_bh = blk_bh, 6128 .vb_access = ocfs2_journal_access_xb, 6129 }; 6130 6131 ret = ocfs2_xattr_attach_refcount_normal(inode, &vb, header, 6132 ref_ci, ref_root_bh, 6133 dealloc); 6134 } else { 6135 struct ocfs2_xattr_tree_value_refcount_para para = { 6136 .ref_ci = ref_ci, 6137 .ref_root_bh = ref_root_bh, 6138 .dealloc = dealloc, 6139 }; 6140 6141 ret = ocfs2_iterate_xattr_index_block(inode, blk_bh, 6142 ocfs2_refcount_xattr_tree_rec, 6143 ¶); 6144 } 6145 6146 return ret; 6147 } 6148 6149 int ocfs2_xattr_attach_refcount_tree(struct inode *inode, 6150 struct buffer_head *fe_bh, 6151 struct ocfs2_caching_info *ref_ci, 6152 struct buffer_head *ref_root_bh, 6153 struct ocfs2_cached_dealloc_ctxt *dealloc) 6154 { 6155 int ret = 0; 6156 struct ocfs2_inode_info *oi = OCFS2_I(inode); 6157 struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data; 6158 struct buffer_head *blk_bh = NULL; 6159 6160 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 6161 ret = ocfs2_xattr_inline_attach_refcount(inode, fe_bh, 6162 ref_ci, ref_root_bh, 6163 dealloc); 6164 if (ret) { 6165 mlog_errno(ret); 6166 goto out; 6167 } 6168 } 6169 6170 if (!di->i_xattr_loc) 6171 goto out; 6172 6173 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc), 6174 &blk_bh); 6175 if (ret < 0) { 6176 mlog_errno(ret); 6177 goto out; 6178 } 6179 6180 ret = ocfs2_xattr_block_attach_refcount(inode, blk_bh, ref_ci, 6181 ref_root_bh, dealloc); 6182 if (ret) 6183 mlog_errno(ret); 6184 6185 brelse(blk_bh); 6186 out: 6187 6188 return ret; 6189 } 6190 6191 typedef int (should_xattr_reflinked)(struct ocfs2_xattr_entry *xe); 6192 /* 6193 * Store the information we need in xattr reflink. 6194 * old_bh and new_bh are inode bh for the old and new inode. 6195 */ 6196 struct ocfs2_xattr_reflink { 6197 struct inode *old_inode; 6198 struct inode *new_inode; 6199 struct buffer_head *old_bh; 6200 struct buffer_head *new_bh; 6201 struct ocfs2_caching_info *ref_ci; 6202 struct buffer_head *ref_root_bh; 6203 struct ocfs2_cached_dealloc_ctxt *dealloc; 6204 should_xattr_reflinked *xattr_reflinked; 6205 }; 6206 6207 /* 6208 * Given a xattr header and xe offset, 6209 * return the proper xv and the corresponding bh. 6210 * xattr in inode, block and xattr tree have different implementaions. 6211 */ 6212 typedef int (get_xattr_value_root)(struct super_block *sb, 6213 struct buffer_head *bh, 6214 struct ocfs2_xattr_header *xh, 6215 int offset, 6216 struct ocfs2_xattr_value_root **xv, 6217 struct buffer_head **ret_bh, 6218 void *para); 6219 6220 /* 6221 * Calculate all the xattr value root metadata stored in this xattr header and 6222 * credits we need if we create them from the scratch. 6223 * We use get_xattr_value_root so that all types of xattr container can use it. 6224 */ 6225 static int ocfs2_value_metas_in_xattr_header(struct super_block *sb, 6226 struct buffer_head *bh, 6227 struct ocfs2_xattr_header *xh, 6228 int *metas, int *credits, 6229 int *num_recs, 6230 get_xattr_value_root *func, 6231 void *para) 6232 { 6233 int i, ret = 0; 6234 struct ocfs2_xattr_value_root *xv; 6235 struct ocfs2_xattr_entry *xe; 6236 6237 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 6238 xe = &xh->xh_entries[i]; 6239 if (ocfs2_xattr_is_local(xe)) 6240 continue; 6241 6242 ret = func(sb, bh, xh, i, &xv, NULL, para); 6243 if (ret) { 6244 mlog_errno(ret); 6245 break; 6246 } 6247 6248 *metas += le16_to_cpu(xv->xr_list.l_tree_depth) * 6249 le16_to_cpu(xv->xr_list.l_next_free_rec); 6250 6251 *credits += ocfs2_calc_extend_credits(sb, 6252 &def_xv.xv.xr_list); 6253 6254 /* 6255 * If the value is a tree with depth > 1, We don't go deep 6256 * to the extent block, so just calculate a maximum record num. 6257 */ 6258 if (!xv->xr_list.l_tree_depth) 6259 *num_recs += le16_to_cpu(xv->xr_list.l_next_free_rec); 6260 else 6261 *num_recs += ocfs2_clusters_for_bytes(sb, 6262 XATTR_SIZE_MAX); 6263 } 6264 6265 return ret; 6266 } 6267 6268 /* Used by xattr inode and block to return the right xv and buffer_head. */ 6269 static int ocfs2_get_xattr_value_root(struct super_block *sb, 6270 struct buffer_head *bh, 6271 struct ocfs2_xattr_header *xh, 6272 int offset, 6273 struct ocfs2_xattr_value_root **xv, 6274 struct buffer_head **ret_bh, 6275 void *para) 6276 { 6277 struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset]; 6278 6279 *xv = (struct ocfs2_xattr_value_root *)((void *)xh + 6280 le16_to_cpu(xe->xe_name_offset) + 6281 OCFS2_XATTR_SIZE(xe->xe_name_len)); 6282 6283 if (ret_bh) 6284 *ret_bh = bh; 6285 6286 return 0; 6287 } 6288 6289 /* 6290 * Lock the meta_ac and caculate how much credits we need for reflink xattrs. 6291 * It is only used for inline xattr and xattr block. 6292 */ 6293 static int ocfs2_reflink_lock_xattr_allocators(struct ocfs2_super *osb, 6294 struct ocfs2_xattr_header *xh, 6295 struct buffer_head *ref_root_bh, 6296 int *credits, 6297 struct ocfs2_alloc_context **meta_ac) 6298 { 6299 int ret, meta_add = 0, num_recs = 0; 6300 struct ocfs2_refcount_block *rb = 6301 (struct ocfs2_refcount_block *)ref_root_bh->b_data; 6302 6303 *credits = 0; 6304 6305 ret = ocfs2_value_metas_in_xattr_header(osb->sb, NULL, xh, 6306 &meta_add, credits, &num_recs, 6307 ocfs2_get_xattr_value_root, 6308 NULL); 6309 if (ret) { 6310 mlog_errno(ret); 6311 goto out; 6312 } 6313 6314 /* 6315 * We need to add/modify num_recs in refcount tree, so just calculate 6316 * an approximate number we need for refcount tree change. 6317 * Sometimes we need to split the tree, and after split, half recs 6318 * will be moved to the new block, and a new block can only provide 6319 * half number of recs. So we multiple new blocks by 2. 6320 */ 6321 num_recs = num_recs / ocfs2_refcount_recs_per_rb(osb->sb) * 2; 6322 meta_add += num_recs; 6323 *credits += num_recs + num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS; 6324 if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL) 6325 *credits += le16_to_cpu(rb->rf_list.l_tree_depth) * 6326 le16_to_cpu(rb->rf_list.l_next_free_rec) + 1; 6327 else 6328 *credits += 1; 6329 6330 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, meta_ac); 6331 if (ret) 6332 mlog_errno(ret); 6333 6334 out: 6335 return ret; 6336 } 6337 6338 /* 6339 * Given a xattr header, reflink all the xattrs in this container. 6340 * It can be used for inode, block and bucket. 6341 * 6342 * NOTE: 6343 * Before we call this function, the caller has memcpy the xattr in 6344 * old_xh to the new_xh. 6345 * 6346 * If args.xattr_reflinked is set, call it to decide whether the xe should 6347 * be reflinked or not. If not, remove it from the new xattr header. 6348 */ 6349 static int ocfs2_reflink_xattr_header(handle_t *handle, 6350 struct ocfs2_xattr_reflink *args, 6351 struct buffer_head *old_bh, 6352 struct ocfs2_xattr_header *xh, 6353 struct buffer_head *new_bh, 6354 struct ocfs2_xattr_header *new_xh, 6355 struct ocfs2_xattr_value_buf *vb, 6356 struct ocfs2_alloc_context *meta_ac, 6357 get_xattr_value_root *func, 6358 void *para) 6359 { 6360 int ret = 0, i, j; 6361 struct super_block *sb = args->old_inode->i_sb; 6362 struct buffer_head *value_bh; 6363 struct ocfs2_xattr_entry *xe, *last; 6364 struct ocfs2_xattr_value_root *xv, *new_xv; 6365 struct ocfs2_extent_tree data_et; 6366 u32 clusters, cpos, p_cluster, num_clusters; 6367 unsigned int ext_flags = 0; 6368 6369 trace_ocfs2_reflink_xattr_header((unsigned long long)old_bh->b_blocknr, 6370 le16_to_cpu(xh->xh_count)); 6371 6372 last = &new_xh->xh_entries[le16_to_cpu(new_xh->xh_count)]; 6373 for (i = 0, j = 0; i < le16_to_cpu(xh->xh_count); i++, j++) { 6374 xe = &xh->xh_entries[i]; 6375 6376 if (args->xattr_reflinked && !args->xattr_reflinked(xe)) { 6377 xe = &new_xh->xh_entries[j]; 6378 6379 le16_add_cpu(&new_xh->xh_count, -1); 6380 if (new_xh->xh_count) { 6381 memmove(xe, xe + 1, 6382 (void *)last - (void *)xe); 6383 memset(last, 0, 6384 sizeof(struct ocfs2_xattr_entry)); 6385 } 6386 6387 /* 6388 * We don't want j to increase in the next round since 6389 * it is already moved ahead. 6390 */ 6391 j--; 6392 continue; 6393 } 6394 6395 if (ocfs2_xattr_is_local(xe)) 6396 continue; 6397 6398 ret = func(sb, old_bh, xh, i, &xv, NULL, para); 6399 if (ret) { 6400 mlog_errno(ret); 6401 break; 6402 } 6403 6404 ret = func(sb, new_bh, new_xh, j, &new_xv, &value_bh, para); 6405 if (ret) { 6406 mlog_errno(ret); 6407 break; 6408 } 6409 6410 /* 6411 * For the xattr which has l_tree_depth = 0, all the extent 6412 * recs have already be copied to the new xh with the 6413 * propriate OCFS2_EXT_REFCOUNTED flag we just need to 6414 * increase the refount count int the refcount tree. 6415 * 6416 * For the xattr which has l_tree_depth > 0, we need 6417 * to initialize it to the empty default value root, 6418 * and then insert the extents one by one. 6419 */ 6420 if (xv->xr_list.l_tree_depth) { 6421 memcpy(new_xv, &def_xv, OCFS2_XATTR_ROOT_SIZE); 6422 vb->vb_xv = new_xv; 6423 vb->vb_bh = value_bh; 6424 ocfs2_init_xattr_value_extent_tree(&data_et, 6425 INODE_CACHE(args->new_inode), vb); 6426 } 6427 6428 clusters = le32_to_cpu(xv->xr_clusters); 6429 cpos = 0; 6430 while (cpos < clusters) { 6431 ret = ocfs2_xattr_get_clusters(args->old_inode, 6432 cpos, 6433 &p_cluster, 6434 &num_clusters, 6435 &xv->xr_list, 6436 &ext_flags); 6437 if (ret) { 6438 mlog_errno(ret); 6439 goto out; 6440 } 6441 6442 BUG_ON(!p_cluster); 6443 6444 if (xv->xr_list.l_tree_depth) { 6445 ret = ocfs2_insert_extent(handle, 6446 &data_et, cpos, 6447 ocfs2_clusters_to_blocks( 6448 args->old_inode->i_sb, 6449 p_cluster), 6450 num_clusters, ext_flags, 6451 meta_ac); 6452 if (ret) { 6453 mlog_errno(ret); 6454 goto out; 6455 } 6456 } 6457 6458 ret = ocfs2_increase_refcount(handle, args->ref_ci, 6459 args->ref_root_bh, 6460 p_cluster, num_clusters, 6461 meta_ac, args->dealloc); 6462 if (ret) { 6463 mlog_errno(ret); 6464 goto out; 6465 } 6466 6467 cpos += num_clusters; 6468 } 6469 } 6470 6471 out: 6472 return ret; 6473 } 6474 6475 static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args) 6476 { 6477 int ret = 0, credits = 0; 6478 handle_t *handle; 6479 struct ocfs2_super *osb = OCFS2_SB(args->old_inode->i_sb); 6480 struct ocfs2_dinode *di = (struct ocfs2_dinode *)args->old_bh->b_data; 6481 int inline_size = le16_to_cpu(di->i_xattr_inline_size); 6482 int header_off = osb->sb->s_blocksize - inline_size; 6483 struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *) 6484 (args->old_bh->b_data + header_off); 6485 struct ocfs2_xattr_header *new_xh = (struct ocfs2_xattr_header *) 6486 (args->new_bh->b_data + header_off); 6487 struct ocfs2_alloc_context *meta_ac = NULL; 6488 struct ocfs2_inode_info *new_oi; 6489 struct ocfs2_dinode *new_di; 6490 struct ocfs2_xattr_value_buf vb = { 6491 .vb_bh = args->new_bh, 6492 .vb_access = ocfs2_journal_access_di, 6493 }; 6494 6495 ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh, 6496 &credits, &meta_ac); 6497 if (ret) { 6498 mlog_errno(ret); 6499 goto out; 6500 } 6501 6502 handle = ocfs2_start_trans(osb, credits); 6503 if (IS_ERR(handle)) { 6504 ret = PTR_ERR(handle); 6505 mlog_errno(ret); 6506 goto out; 6507 } 6508 6509 ret = ocfs2_journal_access_di(handle, INODE_CACHE(args->new_inode), 6510 args->new_bh, OCFS2_JOURNAL_ACCESS_WRITE); 6511 if (ret) { 6512 mlog_errno(ret); 6513 goto out_commit; 6514 } 6515 6516 memcpy(args->new_bh->b_data + header_off, 6517 args->old_bh->b_data + header_off, inline_size); 6518 6519 new_di = (struct ocfs2_dinode *)args->new_bh->b_data; 6520 new_di->i_xattr_inline_size = cpu_to_le16(inline_size); 6521 6522 ret = ocfs2_reflink_xattr_header(handle, args, args->old_bh, xh, 6523 args->new_bh, new_xh, &vb, meta_ac, 6524 ocfs2_get_xattr_value_root, NULL); 6525 if (ret) { 6526 mlog_errno(ret); 6527 goto out_commit; 6528 } 6529 6530 new_oi = OCFS2_I(args->new_inode); 6531 /* 6532 * Adjust extent record count to reserve space for extended attribute. 6533 * Inline data count had been adjusted in ocfs2_duplicate_inline_data(). 6534 */ 6535 if (!(new_oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) && 6536 !(ocfs2_inode_is_fast_symlink(args->new_inode))) { 6537 struct ocfs2_extent_list *el = &new_di->id2.i_list; 6538 le16_add_cpu(&el->l_count, -(inline_size / 6539 sizeof(struct ocfs2_extent_rec))); 6540 } 6541 spin_lock(&new_oi->ip_lock); 6542 new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL; 6543 new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features); 6544 spin_unlock(&new_oi->ip_lock); 6545 6546 ocfs2_journal_dirty(handle, args->new_bh); 6547 6548 out_commit: 6549 ocfs2_commit_trans(osb, handle); 6550 6551 out: 6552 if (meta_ac) 6553 ocfs2_free_alloc_context(meta_ac); 6554 return ret; 6555 } 6556 6557 static int ocfs2_create_empty_xattr_block(struct inode *inode, 6558 struct buffer_head *fe_bh, 6559 struct buffer_head **ret_bh, 6560 int indexed) 6561 { 6562 int ret; 6563 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 6564 struct ocfs2_xattr_set_ctxt ctxt; 6565 6566 memset(&ctxt, 0, sizeof(ctxt)); 6567 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &ctxt.meta_ac); 6568 if (ret < 0) { 6569 mlog_errno(ret); 6570 return ret; 6571 } 6572 6573 ctxt.handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_CREATE_CREDITS); 6574 if (IS_ERR(ctxt.handle)) { 6575 ret = PTR_ERR(ctxt.handle); 6576 mlog_errno(ret); 6577 goto out; 6578 } 6579 6580 trace_ocfs2_create_empty_xattr_block( 6581 (unsigned long long)fe_bh->b_blocknr, indexed); 6582 ret = ocfs2_create_xattr_block(inode, fe_bh, &ctxt, indexed, 6583 ret_bh); 6584 if (ret) 6585 mlog_errno(ret); 6586 6587 ocfs2_commit_trans(osb, ctxt.handle); 6588 out: 6589 ocfs2_free_alloc_context(ctxt.meta_ac); 6590 return ret; 6591 } 6592 6593 static int ocfs2_reflink_xattr_block(struct ocfs2_xattr_reflink *args, 6594 struct buffer_head *blk_bh, 6595 struct buffer_head *new_blk_bh) 6596 { 6597 int ret = 0, credits = 0; 6598 handle_t *handle; 6599 struct ocfs2_inode_info *new_oi = OCFS2_I(args->new_inode); 6600 struct ocfs2_dinode *new_di; 6601 struct ocfs2_super *osb = OCFS2_SB(args->new_inode->i_sb); 6602 int header_off = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); 6603 struct ocfs2_xattr_block *xb = 6604 (struct ocfs2_xattr_block *)blk_bh->b_data; 6605 struct ocfs2_xattr_header *xh = &xb->xb_attrs.xb_header; 6606 struct ocfs2_xattr_block *new_xb = 6607 (struct ocfs2_xattr_block *)new_blk_bh->b_data; 6608 struct ocfs2_xattr_header *new_xh = &new_xb->xb_attrs.xb_header; 6609 struct ocfs2_alloc_context *meta_ac; 6610 struct ocfs2_xattr_value_buf vb = { 6611 .vb_bh = new_blk_bh, 6612 .vb_access = ocfs2_journal_access_xb, 6613 }; 6614 6615 ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh, 6616 &credits, &meta_ac); 6617 if (ret) { 6618 mlog_errno(ret); 6619 return ret; 6620 } 6621 6622 /* One more credits in case we need to add xattr flags in new inode. */ 6623 handle = ocfs2_start_trans(osb, credits + 1); 6624 if (IS_ERR(handle)) { 6625 ret = PTR_ERR(handle); 6626 mlog_errno(ret); 6627 goto out; 6628 } 6629 6630 if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) { 6631 ret = ocfs2_journal_access_di(handle, 6632 INODE_CACHE(args->new_inode), 6633 args->new_bh, 6634 OCFS2_JOURNAL_ACCESS_WRITE); 6635 if (ret) { 6636 mlog_errno(ret); 6637 goto out_commit; 6638 } 6639 } 6640 6641 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(args->new_inode), 6642 new_blk_bh, OCFS2_JOURNAL_ACCESS_WRITE); 6643 if (ret) { 6644 mlog_errno(ret); 6645 goto out_commit; 6646 } 6647 6648 memcpy(new_blk_bh->b_data + header_off, blk_bh->b_data + header_off, 6649 osb->sb->s_blocksize - header_off); 6650 6651 ret = ocfs2_reflink_xattr_header(handle, args, blk_bh, xh, 6652 new_blk_bh, new_xh, &vb, meta_ac, 6653 ocfs2_get_xattr_value_root, NULL); 6654 if (ret) { 6655 mlog_errno(ret); 6656 goto out_commit; 6657 } 6658 6659 ocfs2_journal_dirty(handle, new_blk_bh); 6660 6661 if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) { 6662 new_di = (struct ocfs2_dinode *)args->new_bh->b_data; 6663 spin_lock(&new_oi->ip_lock); 6664 new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL; 6665 new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features); 6666 spin_unlock(&new_oi->ip_lock); 6667 6668 ocfs2_journal_dirty(handle, args->new_bh); 6669 } 6670 6671 out_commit: 6672 ocfs2_commit_trans(osb, handle); 6673 6674 out: 6675 ocfs2_free_alloc_context(meta_ac); 6676 return ret; 6677 } 6678 6679 struct ocfs2_reflink_xattr_tree_args { 6680 struct ocfs2_xattr_reflink *reflink; 6681 struct buffer_head *old_blk_bh; 6682 struct buffer_head *new_blk_bh; 6683 struct ocfs2_xattr_bucket *old_bucket; 6684 struct ocfs2_xattr_bucket *new_bucket; 6685 }; 6686 6687 /* 6688 * NOTE: 6689 * We have to handle the case that both old bucket and new bucket 6690 * will call this function to get the right ret_bh. 6691 * So The caller must give us the right bh. 6692 */ 6693 static int ocfs2_get_reflink_xattr_value_root(struct super_block *sb, 6694 struct buffer_head *bh, 6695 struct ocfs2_xattr_header *xh, 6696 int offset, 6697 struct ocfs2_xattr_value_root **xv, 6698 struct buffer_head **ret_bh, 6699 void *para) 6700 { 6701 struct ocfs2_reflink_xattr_tree_args *args = 6702 (struct ocfs2_reflink_xattr_tree_args *)para; 6703 struct ocfs2_xattr_bucket *bucket; 6704 6705 if (bh == args->old_bucket->bu_bhs[0]) 6706 bucket = args->old_bucket; 6707 else 6708 bucket = args->new_bucket; 6709 6710 return ocfs2_get_xattr_tree_value_root(sb, bucket, offset, 6711 xv, ret_bh); 6712 } 6713 6714 struct ocfs2_value_tree_metas { 6715 int num_metas; 6716 int credits; 6717 int num_recs; 6718 }; 6719 6720 static int ocfs2_value_tree_metas_in_bucket(struct super_block *sb, 6721 struct buffer_head *bh, 6722 struct ocfs2_xattr_header *xh, 6723 int offset, 6724 struct ocfs2_xattr_value_root **xv, 6725 struct buffer_head **ret_bh, 6726 void *para) 6727 { 6728 struct ocfs2_xattr_bucket *bucket = 6729 (struct ocfs2_xattr_bucket *)para; 6730 6731 return ocfs2_get_xattr_tree_value_root(sb, bucket, offset, 6732 xv, ret_bh); 6733 } 6734 6735 static int ocfs2_calc_value_tree_metas(struct inode *inode, 6736 struct ocfs2_xattr_bucket *bucket, 6737 void *para) 6738 { 6739 struct ocfs2_value_tree_metas *metas = 6740 (struct ocfs2_value_tree_metas *)para; 6741 struct ocfs2_xattr_header *xh = 6742 (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data; 6743 6744 /* Add the credits for this bucket first. */ 6745 metas->credits += bucket->bu_blocks; 6746 return ocfs2_value_metas_in_xattr_header(inode->i_sb, bucket->bu_bhs[0], 6747 xh, &metas->num_metas, 6748 &metas->credits, &metas->num_recs, 6749 ocfs2_value_tree_metas_in_bucket, 6750 bucket); 6751 } 6752 6753 /* 6754 * Given a xattr extent rec starting from blkno and having len clusters, 6755 * iterate all the buckets calculate how much metadata we need for reflinking 6756 * all the ocfs2_xattr_value_root and lock the allocators accordingly. 6757 */ 6758 static int ocfs2_lock_reflink_xattr_rec_allocators( 6759 struct ocfs2_reflink_xattr_tree_args *args, 6760 struct ocfs2_extent_tree *xt_et, 6761 u64 blkno, u32 len, int *credits, 6762 struct ocfs2_alloc_context **meta_ac, 6763 struct ocfs2_alloc_context **data_ac) 6764 { 6765 int ret, num_free_extents; 6766 struct ocfs2_value_tree_metas metas; 6767 struct ocfs2_super *osb = OCFS2_SB(args->reflink->old_inode->i_sb); 6768 struct ocfs2_refcount_block *rb; 6769 6770 memset(&metas, 0, sizeof(metas)); 6771 6772 ret = ocfs2_iterate_xattr_buckets(args->reflink->old_inode, blkno, len, 6773 ocfs2_calc_value_tree_metas, &metas); 6774 if (ret) { 6775 mlog_errno(ret); 6776 goto out; 6777 } 6778 6779 *credits = metas.credits; 6780 6781 /* 6782 * Calculate we need for refcount tree change. 6783 * 6784 * We need to add/modify num_recs in refcount tree, so just calculate 6785 * an approximate number we need for refcount tree change. 6786 * Sometimes we need to split the tree, and after split, half recs 6787 * will be moved to the new block, and a new block can only provide 6788 * half number of recs. So we multiple new blocks by 2. 6789 * In the end, we have to add credits for modifying the already 6790 * existed refcount block. 6791 */ 6792 rb = (struct ocfs2_refcount_block *)args->reflink->ref_root_bh->b_data; 6793 metas.num_recs = 6794 (metas.num_recs + ocfs2_refcount_recs_per_rb(osb->sb) - 1) / 6795 ocfs2_refcount_recs_per_rb(osb->sb) * 2; 6796 metas.num_metas += metas.num_recs; 6797 *credits += metas.num_recs + 6798 metas.num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS; 6799 if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL) 6800 *credits += le16_to_cpu(rb->rf_list.l_tree_depth) * 6801 le16_to_cpu(rb->rf_list.l_next_free_rec) + 1; 6802 else 6803 *credits += 1; 6804 6805 /* count in the xattr tree change. */ 6806 num_free_extents = ocfs2_num_free_extents(xt_et); 6807 if (num_free_extents < 0) { 6808 ret = num_free_extents; 6809 mlog_errno(ret); 6810 goto out; 6811 } 6812 6813 if (num_free_extents < len) 6814 metas.num_metas += ocfs2_extend_meta_needed(xt_et->et_root_el); 6815 6816 *credits += ocfs2_calc_extend_credits(osb->sb, 6817 xt_et->et_root_el); 6818 6819 if (metas.num_metas) { 6820 ret = ocfs2_reserve_new_metadata_blocks(osb, metas.num_metas, 6821 meta_ac); 6822 if (ret) { 6823 mlog_errno(ret); 6824 goto out; 6825 } 6826 } 6827 6828 if (len) { 6829 ret = ocfs2_reserve_clusters(osb, len, data_ac); 6830 if (ret) 6831 mlog_errno(ret); 6832 } 6833 out: 6834 if (ret) { 6835 if (*meta_ac) { 6836 ocfs2_free_alloc_context(*meta_ac); 6837 *meta_ac = NULL; 6838 } 6839 } 6840 6841 return ret; 6842 } 6843 6844 static int ocfs2_reflink_xattr_bucket(handle_t *handle, 6845 u64 blkno, u64 new_blkno, u32 clusters, 6846 u32 *cpos, int num_buckets, 6847 struct ocfs2_alloc_context *meta_ac, 6848 struct ocfs2_alloc_context *data_ac, 6849 struct ocfs2_reflink_xattr_tree_args *args) 6850 { 6851 int i, j, ret = 0; 6852 struct super_block *sb = args->reflink->old_inode->i_sb; 6853 int bpb = args->old_bucket->bu_blocks; 6854 struct ocfs2_xattr_value_buf vb = { 6855 .vb_access = ocfs2_journal_access, 6856 }; 6857 6858 for (i = 0; i < num_buckets; i++, blkno += bpb, new_blkno += bpb) { 6859 ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno); 6860 if (ret) { 6861 mlog_errno(ret); 6862 break; 6863 } 6864 6865 ret = ocfs2_init_xattr_bucket(args->new_bucket, new_blkno, 1); 6866 if (ret) { 6867 mlog_errno(ret); 6868 break; 6869 } 6870 6871 ret = ocfs2_xattr_bucket_journal_access(handle, 6872 args->new_bucket, 6873 OCFS2_JOURNAL_ACCESS_CREATE); 6874 if (ret) { 6875 mlog_errno(ret); 6876 break; 6877 } 6878 6879 for (j = 0; j < bpb; j++) 6880 memcpy(bucket_block(args->new_bucket, j), 6881 bucket_block(args->old_bucket, j), 6882 sb->s_blocksize); 6883 6884 /* 6885 * Record the start cpos so that we can use it to initialize 6886 * our xattr tree we also set the xh_num_bucket for the new 6887 * bucket. 6888 */ 6889 if (i == 0) { 6890 *cpos = le32_to_cpu(bucket_xh(args->new_bucket)-> 6891 xh_entries[0].xe_name_hash); 6892 bucket_xh(args->new_bucket)->xh_num_buckets = 6893 cpu_to_le16(num_buckets); 6894 } 6895 6896 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket); 6897 6898 ret = ocfs2_reflink_xattr_header(handle, args->reflink, 6899 args->old_bucket->bu_bhs[0], 6900 bucket_xh(args->old_bucket), 6901 args->new_bucket->bu_bhs[0], 6902 bucket_xh(args->new_bucket), 6903 &vb, meta_ac, 6904 ocfs2_get_reflink_xattr_value_root, 6905 args); 6906 if (ret) { 6907 mlog_errno(ret); 6908 break; 6909 } 6910 6911 /* 6912 * Re-access and dirty the bucket to calculate metaecc. 6913 * Because we may extend the transaction in reflink_xattr_header 6914 * which will let the already accessed block gone. 6915 */ 6916 ret = ocfs2_xattr_bucket_journal_access(handle, 6917 args->new_bucket, 6918 OCFS2_JOURNAL_ACCESS_WRITE); 6919 if (ret) { 6920 mlog_errno(ret); 6921 break; 6922 } 6923 6924 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket); 6925 6926 ocfs2_xattr_bucket_relse(args->old_bucket); 6927 ocfs2_xattr_bucket_relse(args->new_bucket); 6928 } 6929 6930 ocfs2_xattr_bucket_relse(args->old_bucket); 6931 ocfs2_xattr_bucket_relse(args->new_bucket); 6932 return ret; 6933 } 6934 6935 static int ocfs2_reflink_xattr_buckets(handle_t *handle, 6936 struct inode *inode, 6937 struct ocfs2_reflink_xattr_tree_args *args, 6938 struct ocfs2_extent_tree *et, 6939 struct ocfs2_alloc_context *meta_ac, 6940 struct ocfs2_alloc_context *data_ac, 6941 u64 blkno, u32 cpos, u32 len) 6942 { 6943 int ret, first_inserted = 0; 6944 u32 p_cluster, num_clusters, reflink_cpos = 0; 6945 u64 new_blkno; 6946 unsigned int num_buckets, reflink_buckets; 6947 unsigned int bpc = 6948 ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)); 6949 6950 ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno); 6951 if (ret) { 6952 mlog_errno(ret); 6953 goto out; 6954 } 6955 num_buckets = le16_to_cpu(bucket_xh(args->old_bucket)->xh_num_buckets); 6956 ocfs2_xattr_bucket_relse(args->old_bucket); 6957 6958 while (len && num_buckets) { 6959 ret = ocfs2_claim_clusters(handle, data_ac, 6960 1, &p_cluster, &num_clusters); 6961 if (ret) { 6962 mlog_errno(ret); 6963 goto out; 6964 } 6965 6966 new_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 6967 reflink_buckets = min(num_buckets, bpc * num_clusters); 6968 6969 ret = ocfs2_reflink_xattr_bucket(handle, blkno, 6970 new_blkno, num_clusters, 6971 &reflink_cpos, reflink_buckets, 6972 meta_ac, data_ac, args); 6973 if (ret) { 6974 mlog_errno(ret); 6975 goto out; 6976 } 6977 6978 /* 6979 * For the 1st allocated cluster, we make it use the same cpos 6980 * so that the xattr tree looks the same as the original one 6981 * in the most case. 6982 */ 6983 if (!first_inserted) { 6984 reflink_cpos = cpos; 6985 first_inserted = 1; 6986 } 6987 ret = ocfs2_insert_extent(handle, et, reflink_cpos, new_blkno, 6988 num_clusters, 0, meta_ac); 6989 if (ret) 6990 mlog_errno(ret); 6991 6992 trace_ocfs2_reflink_xattr_buckets((unsigned long long)new_blkno, 6993 num_clusters, reflink_cpos); 6994 6995 len -= num_clusters; 6996 blkno += ocfs2_clusters_to_blocks(inode->i_sb, num_clusters); 6997 num_buckets -= reflink_buckets; 6998 } 6999 out: 7000 return ret; 7001 } 7002 7003 /* 7004 * Create the same xattr extent record in the new inode's xattr tree. 7005 */ 7006 static int ocfs2_reflink_xattr_rec(struct inode *inode, 7007 struct buffer_head *root_bh, 7008 u64 blkno, 7009 u32 cpos, 7010 u32 len, 7011 void *para) 7012 { 7013 int ret, credits = 0; 7014 handle_t *handle; 7015 struct ocfs2_reflink_xattr_tree_args *args = 7016 (struct ocfs2_reflink_xattr_tree_args *)para; 7017 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 7018 struct ocfs2_alloc_context *meta_ac = NULL; 7019 struct ocfs2_alloc_context *data_ac = NULL; 7020 struct ocfs2_extent_tree et; 7021 7022 trace_ocfs2_reflink_xattr_rec((unsigned long long)blkno, len); 7023 7024 ocfs2_init_xattr_tree_extent_tree(&et, 7025 INODE_CACHE(args->reflink->new_inode), 7026 args->new_blk_bh); 7027 7028 ret = ocfs2_lock_reflink_xattr_rec_allocators(args, &et, blkno, 7029 len, &credits, 7030 &meta_ac, &data_ac); 7031 if (ret) { 7032 mlog_errno(ret); 7033 goto out; 7034 } 7035 7036 handle = ocfs2_start_trans(osb, credits); 7037 if (IS_ERR(handle)) { 7038 ret = PTR_ERR(handle); 7039 mlog_errno(ret); 7040 goto out; 7041 } 7042 7043 ret = ocfs2_reflink_xattr_buckets(handle, inode, args, &et, 7044 meta_ac, data_ac, 7045 blkno, cpos, len); 7046 if (ret) 7047 mlog_errno(ret); 7048 7049 ocfs2_commit_trans(osb, handle); 7050 7051 out: 7052 if (meta_ac) 7053 ocfs2_free_alloc_context(meta_ac); 7054 if (data_ac) 7055 ocfs2_free_alloc_context(data_ac); 7056 return ret; 7057 } 7058 7059 /* 7060 * Create reflinked xattr buckets. 7061 * We will add bucket one by one, and refcount all the xattrs in the bucket 7062 * if they are stored outside. 7063 */ 7064 static int ocfs2_reflink_xattr_tree(struct ocfs2_xattr_reflink *args, 7065 struct buffer_head *blk_bh, 7066 struct buffer_head *new_blk_bh) 7067 { 7068 int ret; 7069 struct ocfs2_reflink_xattr_tree_args para; 7070 7071 memset(¶, 0, sizeof(para)); 7072 para.reflink = args; 7073 para.old_blk_bh = blk_bh; 7074 para.new_blk_bh = new_blk_bh; 7075 7076 para.old_bucket = ocfs2_xattr_bucket_new(args->old_inode); 7077 if (!para.old_bucket) { 7078 mlog_errno(-ENOMEM); 7079 return -ENOMEM; 7080 } 7081 7082 para.new_bucket = ocfs2_xattr_bucket_new(args->new_inode); 7083 if (!para.new_bucket) { 7084 ret = -ENOMEM; 7085 mlog_errno(ret); 7086 goto out; 7087 } 7088 7089 ret = ocfs2_iterate_xattr_index_block(args->old_inode, blk_bh, 7090 ocfs2_reflink_xattr_rec, 7091 ¶); 7092 if (ret) 7093 mlog_errno(ret); 7094 7095 out: 7096 ocfs2_xattr_bucket_free(para.old_bucket); 7097 ocfs2_xattr_bucket_free(para.new_bucket); 7098 return ret; 7099 } 7100 7101 static int ocfs2_reflink_xattr_in_block(struct ocfs2_xattr_reflink *args, 7102 struct buffer_head *blk_bh) 7103 { 7104 int ret, indexed = 0; 7105 struct buffer_head *new_blk_bh = NULL; 7106 struct ocfs2_xattr_block *xb = 7107 (struct ocfs2_xattr_block *)blk_bh->b_data; 7108 7109 7110 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) 7111 indexed = 1; 7112 7113 ret = ocfs2_create_empty_xattr_block(args->new_inode, args->new_bh, 7114 &new_blk_bh, indexed); 7115 if (ret) { 7116 mlog_errno(ret); 7117 goto out; 7118 } 7119 7120 if (!indexed) 7121 ret = ocfs2_reflink_xattr_block(args, blk_bh, new_blk_bh); 7122 else 7123 ret = ocfs2_reflink_xattr_tree(args, blk_bh, new_blk_bh); 7124 if (ret) 7125 mlog_errno(ret); 7126 7127 out: 7128 brelse(new_blk_bh); 7129 return ret; 7130 } 7131 7132 static int ocfs2_reflink_xattr_no_security(struct ocfs2_xattr_entry *xe) 7133 { 7134 int type = ocfs2_xattr_get_type(xe); 7135 7136 return type != OCFS2_XATTR_INDEX_SECURITY && 7137 type != OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS && 7138 type != OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT; 7139 } 7140 7141 int ocfs2_reflink_xattrs(struct inode *old_inode, 7142 struct buffer_head *old_bh, 7143 struct inode *new_inode, 7144 struct buffer_head *new_bh, 7145 bool preserve_security) 7146 { 7147 int ret; 7148 struct ocfs2_xattr_reflink args; 7149 struct ocfs2_inode_info *oi = OCFS2_I(old_inode); 7150 struct ocfs2_dinode *di = (struct ocfs2_dinode *)old_bh->b_data; 7151 struct buffer_head *blk_bh = NULL; 7152 struct ocfs2_cached_dealloc_ctxt dealloc; 7153 struct ocfs2_refcount_tree *ref_tree; 7154 struct buffer_head *ref_root_bh = NULL; 7155 7156 ret = ocfs2_lock_refcount_tree(OCFS2_SB(old_inode->i_sb), 7157 le64_to_cpu(di->i_refcount_loc), 7158 1, &ref_tree, &ref_root_bh); 7159 if (ret) { 7160 mlog_errno(ret); 7161 goto out; 7162 } 7163 7164 ocfs2_init_dealloc_ctxt(&dealloc); 7165 7166 args.old_inode = old_inode; 7167 args.new_inode = new_inode; 7168 args.old_bh = old_bh; 7169 args.new_bh = new_bh; 7170 args.ref_ci = &ref_tree->rf_ci; 7171 args.ref_root_bh = ref_root_bh; 7172 args.dealloc = &dealloc; 7173 if (preserve_security) 7174 args.xattr_reflinked = NULL; 7175 else 7176 args.xattr_reflinked = ocfs2_reflink_xattr_no_security; 7177 7178 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 7179 ret = ocfs2_reflink_xattr_inline(&args); 7180 if (ret) { 7181 mlog_errno(ret); 7182 goto out_unlock; 7183 } 7184 } 7185 7186 if (!di->i_xattr_loc) 7187 goto out_unlock; 7188 7189 ret = ocfs2_read_xattr_block(old_inode, le64_to_cpu(di->i_xattr_loc), 7190 &blk_bh); 7191 if (ret < 0) { 7192 mlog_errno(ret); 7193 goto out_unlock; 7194 } 7195 7196 ret = ocfs2_reflink_xattr_in_block(&args, blk_bh); 7197 if (ret) 7198 mlog_errno(ret); 7199 7200 brelse(blk_bh); 7201 7202 out_unlock: 7203 ocfs2_unlock_refcount_tree(OCFS2_SB(old_inode->i_sb), 7204 ref_tree, 1); 7205 brelse(ref_root_bh); 7206 7207 if (ocfs2_dealloc_has_cluster(&dealloc)) { 7208 ocfs2_schedule_truncate_log_flush(OCFS2_SB(old_inode->i_sb), 1); 7209 ocfs2_run_deallocs(OCFS2_SB(old_inode->i_sb), &dealloc); 7210 } 7211 7212 out: 7213 return ret; 7214 } 7215 7216 /* 7217 * Initialize security and acl for a already created inode. 7218 * Used for reflink a non-preserve-security file. 7219 * 7220 * It uses common api like ocfs2_xattr_set, so the caller 7221 * must not hold any lock expect i_mutex. 7222 */ 7223 int ocfs2_init_security_and_acl(struct inode *dir, 7224 struct inode *inode, 7225 const struct qstr *qstr) 7226 { 7227 int ret = 0; 7228 struct buffer_head *dir_bh = NULL; 7229 7230 ret = ocfs2_init_security_get(inode, dir, qstr, NULL); 7231 if (ret) { 7232 mlog_errno(ret); 7233 goto leave; 7234 } 7235 7236 ret = ocfs2_inode_lock(dir, &dir_bh, 0); 7237 if (ret) { 7238 mlog_errno(ret); 7239 goto leave; 7240 } 7241 ret = ocfs2_init_acl(NULL, inode, dir, NULL, dir_bh, NULL, NULL); 7242 if (ret) 7243 mlog_errno(ret); 7244 7245 ocfs2_inode_unlock(dir, 0); 7246 brelse(dir_bh); 7247 leave: 7248 return ret; 7249 } 7250 7251 /* 7252 * 'security' attributes support 7253 */ 7254 static int ocfs2_xattr_security_get(const struct xattr_handler *handler, 7255 struct dentry *unused, struct inode *inode, 7256 const char *name, void *buffer, size_t size) 7257 { 7258 return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_SECURITY, 7259 name, buffer, size); 7260 } 7261 7262 static int ocfs2_xattr_security_set(const struct xattr_handler *handler, 7263 struct dentry *unused, struct inode *inode, 7264 const char *name, const void *value, 7265 size_t size, int flags) 7266 { 7267 return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, 7268 name, value, size, flags); 7269 } 7270 7271 static int ocfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array, 7272 void *fs_info) 7273 { 7274 const struct xattr *xattr; 7275 int err = 0; 7276 7277 for (xattr = xattr_array; xattr->name != NULL; xattr++) { 7278 err = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, 7279 xattr->name, xattr->value, 7280 xattr->value_len, XATTR_CREATE); 7281 if (err) 7282 break; 7283 } 7284 return err; 7285 } 7286 7287 int ocfs2_init_security_get(struct inode *inode, 7288 struct inode *dir, 7289 const struct qstr *qstr, 7290 struct ocfs2_security_xattr_info *si) 7291 { 7292 /* check whether ocfs2 support feature xattr */ 7293 if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb))) 7294 return -EOPNOTSUPP; 7295 if (si) 7296 return security_old_inode_init_security(inode, dir, qstr, 7297 &si->name, &si->value, 7298 &si->value_len); 7299 7300 return security_inode_init_security(inode, dir, qstr, 7301 &ocfs2_initxattrs, NULL); 7302 } 7303 7304 int ocfs2_init_security_set(handle_t *handle, 7305 struct inode *inode, 7306 struct buffer_head *di_bh, 7307 struct ocfs2_security_xattr_info *si, 7308 struct ocfs2_alloc_context *xattr_ac, 7309 struct ocfs2_alloc_context *data_ac) 7310 { 7311 return ocfs2_xattr_set_handle(handle, inode, di_bh, 7312 OCFS2_XATTR_INDEX_SECURITY, 7313 si->name, si->value, si->value_len, 0, 7314 xattr_ac, data_ac); 7315 } 7316 7317 const struct xattr_handler ocfs2_xattr_security_handler = { 7318 .prefix = XATTR_SECURITY_PREFIX, 7319 .get = ocfs2_xattr_security_get, 7320 .set = ocfs2_xattr_security_set, 7321 }; 7322 7323 /* 7324 * 'trusted' attributes support 7325 */ 7326 static int ocfs2_xattr_trusted_get(const struct xattr_handler *handler, 7327 struct dentry *unused, struct inode *inode, 7328 const char *name, void *buffer, size_t size) 7329 { 7330 return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_TRUSTED, 7331 name, buffer, size); 7332 } 7333 7334 static int ocfs2_xattr_trusted_set(const struct xattr_handler *handler, 7335 struct dentry *unused, struct inode *inode, 7336 const char *name, const void *value, 7337 size_t size, int flags) 7338 { 7339 return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_TRUSTED, 7340 name, value, size, flags); 7341 } 7342 7343 const struct xattr_handler ocfs2_xattr_trusted_handler = { 7344 .prefix = XATTR_TRUSTED_PREFIX, 7345 .get = ocfs2_xattr_trusted_get, 7346 .set = ocfs2_xattr_trusted_set, 7347 }; 7348 7349 /* 7350 * 'user' attributes support 7351 */ 7352 static int ocfs2_xattr_user_get(const struct xattr_handler *handler, 7353 struct dentry *unused, struct inode *inode, 7354 const char *name, void *buffer, size_t size) 7355 { 7356 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 7357 7358 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 7359 return -EOPNOTSUPP; 7360 return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_USER, name, 7361 buffer, size); 7362 } 7363 7364 static int ocfs2_xattr_user_set(const struct xattr_handler *handler, 7365 struct dentry *unused, struct inode *inode, 7366 const char *name, const void *value, 7367 size_t size, int flags) 7368 { 7369 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 7370 7371 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 7372 return -EOPNOTSUPP; 7373 7374 return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_USER, 7375 name, value, size, flags); 7376 } 7377 7378 const struct xattr_handler ocfs2_xattr_user_handler = { 7379 .prefix = XATTR_USER_PREFIX, 7380 .get = ocfs2_xattr_user_get, 7381 .set = ocfs2_xattr_user_set, 7382 }; 7383