1 /* -*- mode: c; c-basic-offset: 8; -*- 2 * vim: noexpandtab sw=8 ts=8 sts=0: 3 * 4 * xattr.c 5 * 6 * Copyright (C) 2004, 2008 Oracle. All rights reserved. 7 * 8 * CREDITS: 9 * Lots of code in this file is copy from linux/fs/ext3/xattr.c. 10 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> 11 * 12 * This program is free software; you can redistribute it and/or 13 * modify it under the terms of the GNU General Public 14 * License version 2 as published by the Free Software Foundation. 15 * 16 * This program is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 * General Public License for more details. 20 */ 21 22 #include <linux/capability.h> 23 #include <linux/fs.h> 24 #include <linux/types.h> 25 #include <linux/slab.h> 26 #include <linux/highmem.h> 27 #include <linux/pagemap.h> 28 #include <linux/uio.h> 29 #include <linux/sched.h> 30 #include <linux/splice.h> 31 #include <linux/mount.h> 32 #include <linux/writeback.h> 33 #include <linux/falloc.h> 34 #include <linux/sort.h> 35 #include <linux/init.h> 36 #include <linux/module.h> 37 #include <linux/string.h> 38 #include <linux/security.h> 39 40 #include <cluster/masklog.h> 41 42 #include "ocfs2.h" 43 #include "alloc.h" 44 #include "blockcheck.h" 45 #include "dlmglue.h" 46 #include "file.h" 47 #include "symlink.h" 48 #include "sysfile.h" 49 #include "inode.h" 50 #include "journal.h" 51 #include "ocfs2_fs.h" 52 #include "suballoc.h" 53 #include "uptodate.h" 54 #include "buffer_head_io.h" 55 #include "super.h" 56 #include "xattr.h" 57 #include "refcounttree.h" 58 #include "acl.h" 59 #include "ocfs2_trace.h" 60 61 struct ocfs2_xattr_def_value_root { 62 struct ocfs2_xattr_value_root xv; 63 struct ocfs2_extent_rec er; 64 }; 65 66 struct ocfs2_xattr_bucket { 67 /* The inode these xattrs are associated with */ 68 struct inode *bu_inode; 69 70 /* The actual buffers that make up the bucket */ 71 struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET]; 72 73 /* How many blocks make up one bucket for this filesystem */ 74 int bu_blocks; 75 }; 76 77 struct ocfs2_xattr_set_ctxt { 78 handle_t *handle; 79 struct ocfs2_alloc_context *meta_ac; 80 struct ocfs2_alloc_context *data_ac; 81 struct ocfs2_cached_dealloc_ctxt dealloc; 82 int set_abort; 83 }; 84 85 #define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root)) 86 #define OCFS2_XATTR_INLINE_SIZE 80 87 #define OCFS2_XATTR_HEADER_GAP 4 88 #define OCFS2_XATTR_FREE_IN_IBODY (OCFS2_MIN_XATTR_INLINE_SIZE \ 89 - sizeof(struct ocfs2_xattr_header) \ 90 - OCFS2_XATTR_HEADER_GAP) 91 #define OCFS2_XATTR_FREE_IN_BLOCK(ptr) ((ptr)->i_sb->s_blocksize \ 92 - sizeof(struct ocfs2_xattr_block) \ 93 - sizeof(struct ocfs2_xattr_header) \ 94 - OCFS2_XATTR_HEADER_GAP) 95 96 static struct ocfs2_xattr_def_value_root def_xv = { 97 .xv.xr_list.l_count = cpu_to_le16(1), 98 }; 99 100 const struct xattr_handler *ocfs2_xattr_handlers[] = { 101 &ocfs2_xattr_user_handler, 102 &posix_acl_access_xattr_handler, 103 &posix_acl_default_xattr_handler, 104 &ocfs2_xattr_trusted_handler, 105 &ocfs2_xattr_security_handler, 106 NULL 107 }; 108 109 static const struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = { 110 [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler, 111 [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS] 112 = &posix_acl_access_xattr_handler, 113 [OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT] 114 = &posix_acl_default_xattr_handler, 115 [OCFS2_XATTR_INDEX_TRUSTED] = &ocfs2_xattr_trusted_handler, 116 [OCFS2_XATTR_INDEX_SECURITY] = &ocfs2_xattr_security_handler, 117 }; 118 119 struct ocfs2_xattr_info { 120 int xi_name_index; 121 const char *xi_name; 122 int xi_name_len; 123 const void *xi_value; 124 size_t xi_value_len; 125 }; 126 127 struct ocfs2_xattr_search { 128 struct buffer_head *inode_bh; 129 /* 130 * xattr_bh point to the block buffer head which has extended attribute 131 * when extended attribute in inode, xattr_bh is equal to inode_bh. 132 */ 133 struct buffer_head *xattr_bh; 134 struct ocfs2_xattr_header *header; 135 struct ocfs2_xattr_bucket *bucket; 136 void *base; 137 void *end; 138 struct ocfs2_xattr_entry *here; 139 int not_found; 140 }; 141 142 /* Operations on struct ocfs2_xa_entry */ 143 struct ocfs2_xa_loc; 144 struct ocfs2_xa_loc_operations { 145 /* 146 * Journal functions 147 */ 148 int (*xlo_journal_access)(handle_t *handle, struct ocfs2_xa_loc *loc, 149 int type); 150 void (*xlo_journal_dirty)(handle_t *handle, struct ocfs2_xa_loc *loc); 151 152 /* 153 * Return a pointer to the appropriate buffer in loc->xl_storage 154 * at the given offset from loc->xl_header. 155 */ 156 void *(*xlo_offset_pointer)(struct ocfs2_xa_loc *loc, int offset); 157 158 /* Can we reuse the existing entry for the new value? */ 159 int (*xlo_can_reuse)(struct ocfs2_xa_loc *loc, 160 struct ocfs2_xattr_info *xi); 161 162 /* How much space is needed for the new value? */ 163 int (*xlo_check_space)(struct ocfs2_xa_loc *loc, 164 struct ocfs2_xattr_info *xi); 165 166 /* 167 * Return the offset of the first name+value pair. This is 168 * the start of our downward-filling free space. 169 */ 170 int (*xlo_get_free_start)(struct ocfs2_xa_loc *loc); 171 172 /* 173 * Remove the name+value at this location. Do whatever is 174 * appropriate with the remaining name+value pairs. 175 */ 176 void (*xlo_wipe_namevalue)(struct ocfs2_xa_loc *loc); 177 178 /* Fill xl_entry with a new entry */ 179 void (*xlo_add_entry)(struct ocfs2_xa_loc *loc, u32 name_hash); 180 181 /* Add name+value storage to an entry */ 182 void (*xlo_add_namevalue)(struct ocfs2_xa_loc *loc, int size); 183 184 /* 185 * Initialize the value buf's access and bh fields for this entry. 186 * ocfs2_xa_fill_value_buf() will handle the xv pointer. 187 */ 188 void (*xlo_fill_value_buf)(struct ocfs2_xa_loc *loc, 189 struct ocfs2_xattr_value_buf *vb); 190 }; 191 192 /* 193 * Describes an xattr entry location. This is a memory structure 194 * tracking the on-disk structure. 195 */ 196 struct ocfs2_xa_loc { 197 /* This xattr belongs to this inode */ 198 struct inode *xl_inode; 199 200 /* The ocfs2_xattr_header inside the on-disk storage. Not NULL. */ 201 struct ocfs2_xattr_header *xl_header; 202 203 /* Bytes from xl_header to the end of the storage */ 204 int xl_size; 205 206 /* 207 * The ocfs2_xattr_entry this location describes. If this is 208 * NULL, this location describes the on-disk structure where it 209 * would have been. 210 */ 211 struct ocfs2_xattr_entry *xl_entry; 212 213 /* 214 * Internal housekeeping 215 */ 216 217 /* Buffer(s) containing this entry */ 218 void *xl_storage; 219 220 /* Operations on the storage backing this location */ 221 const struct ocfs2_xa_loc_operations *xl_ops; 222 }; 223 224 /* 225 * Convenience functions to calculate how much space is needed for a 226 * given name+value pair 227 */ 228 static int namevalue_size(int name_len, uint64_t value_len) 229 { 230 if (value_len > OCFS2_XATTR_INLINE_SIZE) 231 return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE; 232 else 233 return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len); 234 } 235 236 static int namevalue_size_xi(struct ocfs2_xattr_info *xi) 237 { 238 return namevalue_size(xi->xi_name_len, xi->xi_value_len); 239 } 240 241 static int namevalue_size_xe(struct ocfs2_xattr_entry *xe) 242 { 243 u64 value_len = le64_to_cpu(xe->xe_value_size); 244 245 BUG_ON((value_len > OCFS2_XATTR_INLINE_SIZE) && 246 ocfs2_xattr_is_local(xe)); 247 return namevalue_size(xe->xe_name_len, value_len); 248 } 249 250 251 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb, 252 struct ocfs2_xattr_header *xh, 253 int index, 254 int *block_off, 255 int *new_offset); 256 257 static int ocfs2_xattr_block_find(struct inode *inode, 258 int name_index, 259 const char *name, 260 struct ocfs2_xattr_search *xs); 261 static int ocfs2_xattr_index_block_find(struct inode *inode, 262 struct buffer_head *root_bh, 263 int name_index, 264 const char *name, 265 struct ocfs2_xattr_search *xs); 266 267 static int ocfs2_xattr_tree_list_index_block(struct inode *inode, 268 struct buffer_head *blk_bh, 269 char *buffer, 270 size_t buffer_size); 271 272 static int ocfs2_xattr_create_index_block(struct inode *inode, 273 struct ocfs2_xattr_search *xs, 274 struct ocfs2_xattr_set_ctxt *ctxt); 275 276 static int ocfs2_xattr_set_entry_index_block(struct inode *inode, 277 struct ocfs2_xattr_info *xi, 278 struct ocfs2_xattr_search *xs, 279 struct ocfs2_xattr_set_ctxt *ctxt); 280 281 typedef int (xattr_tree_rec_func)(struct inode *inode, 282 struct buffer_head *root_bh, 283 u64 blkno, u32 cpos, u32 len, void *para); 284 static int ocfs2_iterate_xattr_index_block(struct inode *inode, 285 struct buffer_head *root_bh, 286 xattr_tree_rec_func *rec_func, 287 void *para); 288 static int ocfs2_delete_xattr_in_bucket(struct inode *inode, 289 struct ocfs2_xattr_bucket *bucket, 290 void *para); 291 static int ocfs2_rm_xattr_cluster(struct inode *inode, 292 struct buffer_head *root_bh, 293 u64 blkno, 294 u32 cpos, 295 u32 len, 296 void *para); 297 298 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle, 299 u64 src_blk, u64 last_blk, u64 to_blk, 300 unsigned int start_bucket, 301 u32 *first_hash); 302 static int ocfs2_prepare_refcount_xattr(struct inode *inode, 303 struct ocfs2_dinode *di, 304 struct ocfs2_xattr_info *xi, 305 struct ocfs2_xattr_search *xis, 306 struct ocfs2_xattr_search *xbs, 307 struct ocfs2_refcount_tree **ref_tree, 308 int *meta_need, 309 int *credits); 310 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb, 311 struct ocfs2_xattr_bucket *bucket, 312 int offset, 313 struct ocfs2_xattr_value_root **xv, 314 struct buffer_head **bh); 315 316 static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb) 317 { 318 return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE; 319 } 320 321 static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb) 322 { 323 return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits); 324 } 325 326 #define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr) 327 #define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data) 328 #define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0)) 329 330 static struct ocfs2_xattr_bucket *ocfs2_xattr_bucket_new(struct inode *inode) 331 { 332 struct ocfs2_xattr_bucket *bucket; 333 int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 334 335 BUG_ON(blks > OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET); 336 337 bucket = kzalloc(sizeof(struct ocfs2_xattr_bucket), GFP_NOFS); 338 if (bucket) { 339 bucket->bu_inode = inode; 340 bucket->bu_blocks = blks; 341 } 342 343 return bucket; 344 } 345 346 static void ocfs2_xattr_bucket_relse(struct ocfs2_xattr_bucket *bucket) 347 { 348 int i; 349 350 for (i = 0; i < bucket->bu_blocks; i++) { 351 brelse(bucket->bu_bhs[i]); 352 bucket->bu_bhs[i] = NULL; 353 } 354 } 355 356 static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket) 357 { 358 if (bucket) { 359 ocfs2_xattr_bucket_relse(bucket); 360 bucket->bu_inode = NULL; 361 kfree(bucket); 362 } 363 } 364 365 /* 366 * A bucket that has never been written to disk doesn't need to be 367 * read. We just need the buffer_heads. Don't call this for 368 * buckets that are already on disk. ocfs2_read_xattr_bucket() initializes 369 * them fully. 370 */ 371 static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket, 372 u64 xb_blkno, int new) 373 { 374 int i, rc = 0; 375 376 for (i = 0; i < bucket->bu_blocks; i++) { 377 bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb, 378 xb_blkno + i); 379 if (!bucket->bu_bhs[i]) { 380 rc = -ENOMEM; 381 mlog_errno(rc); 382 break; 383 } 384 385 if (!ocfs2_buffer_uptodate(INODE_CACHE(bucket->bu_inode), 386 bucket->bu_bhs[i])) { 387 if (new) 388 ocfs2_set_new_buffer_uptodate(INODE_CACHE(bucket->bu_inode), 389 bucket->bu_bhs[i]); 390 else { 391 set_buffer_uptodate(bucket->bu_bhs[i]); 392 ocfs2_set_buffer_uptodate(INODE_CACHE(bucket->bu_inode), 393 bucket->bu_bhs[i]); 394 } 395 } 396 } 397 398 if (rc) 399 ocfs2_xattr_bucket_relse(bucket); 400 return rc; 401 } 402 403 /* Read the xattr bucket at xb_blkno */ 404 static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket, 405 u64 xb_blkno) 406 { 407 int rc; 408 409 rc = ocfs2_read_blocks(INODE_CACHE(bucket->bu_inode), xb_blkno, 410 bucket->bu_blocks, bucket->bu_bhs, 0, 411 NULL); 412 if (!rc) { 413 spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 414 rc = ocfs2_validate_meta_ecc_bhs(bucket->bu_inode->i_sb, 415 bucket->bu_bhs, 416 bucket->bu_blocks, 417 &bucket_xh(bucket)->xh_check); 418 spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 419 if (rc) 420 mlog_errno(rc); 421 } 422 423 if (rc) 424 ocfs2_xattr_bucket_relse(bucket); 425 return rc; 426 } 427 428 static int ocfs2_xattr_bucket_journal_access(handle_t *handle, 429 struct ocfs2_xattr_bucket *bucket, 430 int type) 431 { 432 int i, rc = 0; 433 434 for (i = 0; i < bucket->bu_blocks; i++) { 435 rc = ocfs2_journal_access(handle, 436 INODE_CACHE(bucket->bu_inode), 437 bucket->bu_bhs[i], type); 438 if (rc) { 439 mlog_errno(rc); 440 break; 441 } 442 } 443 444 return rc; 445 } 446 447 static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle, 448 struct ocfs2_xattr_bucket *bucket) 449 { 450 int i; 451 452 spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 453 ocfs2_compute_meta_ecc_bhs(bucket->bu_inode->i_sb, 454 bucket->bu_bhs, bucket->bu_blocks, 455 &bucket_xh(bucket)->xh_check); 456 spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock); 457 458 for (i = 0; i < bucket->bu_blocks; i++) 459 ocfs2_journal_dirty(handle, bucket->bu_bhs[i]); 460 } 461 462 static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest, 463 struct ocfs2_xattr_bucket *src) 464 { 465 int i; 466 int blocksize = src->bu_inode->i_sb->s_blocksize; 467 468 BUG_ON(dest->bu_blocks != src->bu_blocks); 469 BUG_ON(dest->bu_inode != src->bu_inode); 470 471 for (i = 0; i < src->bu_blocks; i++) { 472 memcpy(bucket_block(dest, i), bucket_block(src, i), 473 blocksize); 474 } 475 } 476 477 static int ocfs2_validate_xattr_block(struct super_block *sb, 478 struct buffer_head *bh) 479 { 480 int rc; 481 struct ocfs2_xattr_block *xb = 482 (struct ocfs2_xattr_block *)bh->b_data; 483 484 trace_ocfs2_validate_xattr_block((unsigned long long)bh->b_blocknr); 485 486 BUG_ON(!buffer_uptodate(bh)); 487 488 /* 489 * If the ecc fails, we return the error but otherwise 490 * leave the filesystem running. We know any error is 491 * local to this block. 492 */ 493 rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &xb->xb_check); 494 if (rc) 495 return rc; 496 497 /* 498 * Errors after here are fatal 499 */ 500 501 if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) { 502 return ocfs2_error(sb, 503 "Extended attribute block #%llu has bad signature %.*s\n", 504 (unsigned long long)bh->b_blocknr, 7, 505 xb->xb_signature); 506 } 507 508 if (le64_to_cpu(xb->xb_blkno) != bh->b_blocknr) { 509 return ocfs2_error(sb, 510 "Extended attribute block #%llu has an invalid xb_blkno of %llu\n", 511 (unsigned long long)bh->b_blocknr, 512 (unsigned long long)le64_to_cpu(xb->xb_blkno)); 513 } 514 515 if (le32_to_cpu(xb->xb_fs_generation) != OCFS2_SB(sb)->fs_generation) { 516 return ocfs2_error(sb, 517 "Extended attribute block #%llu has an invalid xb_fs_generation of #%u\n", 518 (unsigned long long)bh->b_blocknr, 519 le32_to_cpu(xb->xb_fs_generation)); 520 } 521 522 return 0; 523 } 524 525 static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno, 526 struct buffer_head **bh) 527 { 528 int rc; 529 struct buffer_head *tmp = *bh; 530 531 rc = ocfs2_read_block(INODE_CACHE(inode), xb_blkno, &tmp, 532 ocfs2_validate_xattr_block); 533 534 /* If ocfs2_read_block() got us a new bh, pass it up. */ 535 if (!rc && !*bh) 536 *bh = tmp; 537 538 return rc; 539 } 540 541 static inline const char *ocfs2_xattr_prefix(int name_index) 542 { 543 const struct xattr_handler *handler = NULL; 544 545 if (name_index > 0 && name_index < OCFS2_XATTR_MAX) 546 handler = ocfs2_xattr_handler_map[name_index]; 547 return handler ? xattr_prefix(handler) : NULL; 548 } 549 550 static u32 ocfs2_xattr_name_hash(struct inode *inode, 551 const char *name, 552 int name_len) 553 { 554 /* Get hash value of uuid from super block */ 555 u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash; 556 int i; 557 558 /* hash extended attribute name */ 559 for (i = 0; i < name_len; i++) { 560 hash = (hash << OCFS2_HASH_SHIFT) ^ 561 (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^ 562 *name++; 563 } 564 565 return hash; 566 } 567 568 static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len) 569 { 570 return namevalue_size(name_len, value_len) + 571 sizeof(struct ocfs2_xattr_entry); 572 } 573 574 static int ocfs2_xi_entry_usage(struct ocfs2_xattr_info *xi) 575 { 576 return namevalue_size_xi(xi) + 577 sizeof(struct ocfs2_xattr_entry); 578 } 579 580 static int ocfs2_xe_entry_usage(struct ocfs2_xattr_entry *xe) 581 { 582 return namevalue_size_xe(xe) + 583 sizeof(struct ocfs2_xattr_entry); 584 } 585 586 int ocfs2_calc_security_init(struct inode *dir, 587 struct ocfs2_security_xattr_info *si, 588 int *want_clusters, 589 int *xattr_credits, 590 struct ocfs2_alloc_context **xattr_ac) 591 { 592 int ret = 0; 593 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); 594 int s_size = ocfs2_xattr_entry_real_size(strlen(si->name), 595 si->value_len); 596 597 /* 598 * The max space of security xattr taken inline is 599 * 256(name) + 80(value) + 16(entry) = 352 bytes, 600 * So reserve one metadata block for it is ok. 601 */ 602 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE || 603 s_size > OCFS2_XATTR_FREE_IN_IBODY) { 604 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac); 605 if (ret) { 606 mlog_errno(ret); 607 return ret; 608 } 609 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; 610 } 611 612 /* reserve clusters for xattr value which will be set in B tree*/ 613 if (si->value_len > OCFS2_XATTR_INLINE_SIZE) { 614 int new_clusters = ocfs2_clusters_for_bytes(dir->i_sb, 615 si->value_len); 616 617 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb, 618 new_clusters); 619 *want_clusters += new_clusters; 620 } 621 return ret; 622 } 623 624 int ocfs2_calc_xattr_init(struct inode *dir, 625 struct buffer_head *dir_bh, 626 umode_t mode, 627 struct ocfs2_security_xattr_info *si, 628 int *want_clusters, 629 int *xattr_credits, 630 int *want_meta) 631 { 632 int ret = 0; 633 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); 634 int s_size = 0, a_size = 0, acl_len = 0, new_clusters; 635 636 if (si->enable) 637 s_size = ocfs2_xattr_entry_real_size(strlen(si->name), 638 si->value_len); 639 640 if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) { 641 acl_len = ocfs2_xattr_get_nolock(dir, dir_bh, 642 OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT, 643 "", NULL, 0); 644 if (acl_len > 0) { 645 a_size = ocfs2_xattr_entry_real_size(0, acl_len); 646 if (S_ISDIR(mode)) 647 a_size <<= 1; 648 } else if (acl_len != 0 && acl_len != -ENODATA) { 649 mlog_errno(ret); 650 return ret; 651 } 652 } 653 654 if (!(s_size + a_size)) 655 return ret; 656 657 /* 658 * The max space of security xattr taken inline is 659 * 256(name) + 80(value) + 16(entry) = 352 bytes, 660 * The max space of acl xattr taken inline is 661 * 80(value) + 16(entry) * 2(if directory) = 192 bytes, 662 * when blocksize = 512, may reserve one more cluser for 663 * xattr bucket, otherwise reserve one metadata block 664 * for them is ok. 665 * If this is a new directory with inline data, 666 * we choose to reserve the entire inline area for 667 * directory contents and force an external xattr block. 668 */ 669 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE || 670 (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) || 671 (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) { 672 *want_meta = *want_meta + 1; 673 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; 674 } 675 676 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE && 677 (s_size + a_size) > OCFS2_XATTR_FREE_IN_BLOCK(dir)) { 678 *want_clusters += 1; 679 *xattr_credits += ocfs2_blocks_per_xattr_bucket(dir->i_sb); 680 } 681 682 /* 683 * reserve credits and clusters for xattrs which has large value 684 * and have to be set outside 685 */ 686 if (si->enable && si->value_len > OCFS2_XATTR_INLINE_SIZE) { 687 new_clusters = ocfs2_clusters_for_bytes(dir->i_sb, 688 si->value_len); 689 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb, 690 new_clusters); 691 *want_clusters += new_clusters; 692 } 693 if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL && 694 acl_len > OCFS2_XATTR_INLINE_SIZE) { 695 /* for directory, it has DEFAULT and ACCESS two types of acls */ 696 new_clusters = (S_ISDIR(mode) ? 2 : 1) * 697 ocfs2_clusters_for_bytes(dir->i_sb, acl_len); 698 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb, 699 new_clusters); 700 *want_clusters += new_clusters; 701 } 702 703 return ret; 704 } 705 706 static int ocfs2_xattr_extend_allocation(struct inode *inode, 707 u32 clusters_to_add, 708 struct ocfs2_xattr_value_buf *vb, 709 struct ocfs2_xattr_set_ctxt *ctxt) 710 { 711 int status = 0, credits; 712 handle_t *handle = ctxt->handle; 713 enum ocfs2_alloc_restarted why; 714 u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters); 715 struct ocfs2_extent_tree et; 716 717 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); 718 719 while (clusters_to_add) { 720 trace_ocfs2_xattr_extend_allocation(clusters_to_add); 721 722 status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh, 723 OCFS2_JOURNAL_ACCESS_WRITE); 724 if (status < 0) { 725 mlog_errno(status); 726 break; 727 } 728 729 prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters); 730 status = ocfs2_add_clusters_in_btree(handle, 731 &et, 732 &logical_start, 733 clusters_to_add, 734 0, 735 ctxt->data_ac, 736 ctxt->meta_ac, 737 &why); 738 if ((status < 0) && (status != -EAGAIN)) { 739 if (status != -ENOSPC) 740 mlog_errno(status); 741 break; 742 } 743 744 ocfs2_journal_dirty(handle, vb->vb_bh); 745 746 clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - 747 prev_clusters; 748 749 if (why != RESTART_NONE && clusters_to_add) { 750 /* 751 * We can only fail in case the alloc file doesn't give 752 * up enough clusters. 753 */ 754 BUG_ON(why == RESTART_META); 755 756 credits = ocfs2_calc_extend_credits(inode->i_sb, 757 &vb->vb_xv->xr_list); 758 status = ocfs2_extend_trans(handle, credits); 759 if (status < 0) { 760 status = -ENOMEM; 761 mlog_errno(status); 762 break; 763 } 764 } 765 } 766 767 return status; 768 } 769 770 static int __ocfs2_remove_xattr_range(struct inode *inode, 771 struct ocfs2_xattr_value_buf *vb, 772 u32 cpos, u32 phys_cpos, u32 len, 773 unsigned int ext_flags, 774 struct ocfs2_xattr_set_ctxt *ctxt) 775 { 776 int ret; 777 u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); 778 handle_t *handle = ctxt->handle; 779 struct ocfs2_extent_tree et; 780 781 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); 782 783 ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh, 784 OCFS2_JOURNAL_ACCESS_WRITE); 785 if (ret) { 786 mlog_errno(ret); 787 goto out; 788 } 789 790 ret = ocfs2_remove_extent(handle, &et, cpos, len, ctxt->meta_ac, 791 &ctxt->dealloc); 792 if (ret) { 793 mlog_errno(ret); 794 goto out; 795 } 796 797 le32_add_cpu(&vb->vb_xv->xr_clusters, -len); 798 ocfs2_journal_dirty(handle, vb->vb_bh); 799 800 if (ext_flags & OCFS2_EXT_REFCOUNTED) 801 ret = ocfs2_decrease_refcount(inode, handle, 802 ocfs2_blocks_to_clusters(inode->i_sb, 803 phys_blkno), 804 len, ctxt->meta_ac, &ctxt->dealloc, 1); 805 else 806 ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc, 807 phys_blkno, len); 808 if (ret) 809 mlog_errno(ret); 810 811 out: 812 return ret; 813 } 814 815 static int ocfs2_xattr_shrink_size(struct inode *inode, 816 u32 old_clusters, 817 u32 new_clusters, 818 struct ocfs2_xattr_value_buf *vb, 819 struct ocfs2_xattr_set_ctxt *ctxt) 820 { 821 int ret = 0; 822 unsigned int ext_flags; 823 u32 trunc_len, cpos, phys_cpos, alloc_size; 824 u64 block; 825 826 if (old_clusters <= new_clusters) 827 return 0; 828 829 cpos = new_clusters; 830 trunc_len = old_clusters - new_clusters; 831 while (trunc_len) { 832 ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos, 833 &alloc_size, 834 &vb->vb_xv->xr_list, &ext_flags); 835 if (ret) { 836 mlog_errno(ret); 837 goto out; 838 } 839 840 if (alloc_size > trunc_len) 841 alloc_size = trunc_len; 842 843 ret = __ocfs2_remove_xattr_range(inode, vb, cpos, 844 phys_cpos, alloc_size, 845 ext_flags, ctxt); 846 if (ret) { 847 mlog_errno(ret); 848 goto out; 849 } 850 851 block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); 852 ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), 853 block, alloc_size); 854 cpos += alloc_size; 855 trunc_len -= alloc_size; 856 } 857 858 out: 859 return ret; 860 } 861 862 static int ocfs2_xattr_value_truncate(struct inode *inode, 863 struct ocfs2_xattr_value_buf *vb, 864 int len, 865 struct ocfs2_xattr_set_ctxt *ctxt) 866 { 867 int ret; 868 u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len); 869 u32 old_clusters = le32_to_cpu(vb->vb_xv->xr_clusters); 870 871 if (new_clusters == old_clusters) 872 return 0; 873 874 if (new_clusters > old_clusters) 875 ret = ocfs2_xattr_extend_allocation(inode, 876 new_clusters - old_clusters, 877 vb, ctxt); 878 else 879 ret = ocfs2_xattr_shrink_size(inode, 880 old_clusters, new_clusters, 881 vb, ctxt); 882 883 return ret; 884 } 885 886 static int ocfs2_xattr_list_entry(char *buffer, size_t size, 887 size_t *result, const char *prefix, 888 const char *name, int name_len) 889 { 890 char *p = buffer + *result; 891 int prefix_len = strlen(prefix); 892 int total_len = prefix_len + name_len + 1; 893 894 *result += total_len; 895 896 /* we are just looking for how big our buffer needs to be */ 897 if (!size) 898 return 0; 899 900 if (*result > size) 901 return -ERANGE; 902 903 memcpy(p, prefix, prefix_len); 904 memcpy(p + prefix_len, name, name_len); 905 p[prefix_len + name_len] = '\0'; 906 907 return 0; 908 } 909 910 static int ocfs2_xattr_list_entries(struct inode *inode, 911 struct ocfs2_xattr_header *header, 912 char *buffer, size_t buffer_size) 913 { 914 size_t result = 0; 915 int i, type, ret; 916 const char *prefix, *name; 917 918 for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) { 919 struct ocfs2_xattr_entry *entry = &header->xh_entries[i]; 920 type = ocfs2_xattr_get_type(entry); 921 prefix = ocfs2_xattr_prefix(type); 922 923 if (prefix) { 924 name = (const char *)header + 925 le16_to_cpu(entry->xe_name_offset); 926 927 ret = ocfs2_xattr_list_entry(buffer, buffer_size, 928 &result, prefix, name, 929 entry->xe_name_len); 930 if (ret) 931 return ret; 932 } 933 } 934 935 return result; 936 } 937 938 int ocfs2_has_inline_xattr_value_outside(struct inode *inode, 939 struct ocfs2_dinode *di) 940 { 941 struct ocfs2_xattr_header *xh; 942 int i; 943 944 xh = (struct ocfs2_xattr_header *) 945 ((void *)di + inode->i_sb->s_blocksize - 946 le16_to_cpu(di->i_xattr_inline_size)); 947 948 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) 949 if (!ocfs2_xattr_is_local(&xh->xh_entries[i])) 950 return 1; 951 952 return 0; 953 } 954 955 static int ocfs2_xattr_ibody_list(struct inode *inode, 956 struct ocfs2_dinode *di, 957 char *buffer, 958 size_t buffer_size) 959 { 960 struct ocfs2_xattr_header *header = NULL; 961 struct ocfs2_inode_info *oi = OCFS2_I(inode); 962 int ret = 0; 963 964 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) 965 return ret; 966 967 header = (struct ocfs2_xattr_header *) 968 ((void *)di + inode->i_sb->s_blocksize - 969 le16_to_cpu(di->i_xattr_inline_size)); 970 971 ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size); 972 973 return ret; 974 } 975 976 static int ocfs2_xattr_block_list(struct inode *inode, 977 struct ocfs2_dinode *di, 978 char *buffer, 979 size_t buffer_size) 980 { 981 struct buffer_head *blk_bh = NULL; 982 struct ocfs2_xattr_block *xb; 983 int ret = 0; 984 985 if (!di->i_xattr_loc) 986 return ret; 987 988 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc), 989 &blk_bh); 990 if (ret < 0) { 991 mlog_errno(ret); 992 return ret; 993 } 994 995 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 996 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 997 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header; 998 ret = ocfs2_xattr_list_entries(inode, header, 999 buffer, buffer_size); 1000 } else 1001 ret = ocfs2_xattr_tree_list_index_block(inode, blk_bh, 1002 buffer, buffer_size); 1003 1004 brelse(blk_bh); 1005 1006 return ret; 1007 } 1008 1009 ssize_t ocfs2_listxattr(struct dentry *dentry, 1010 char *buffer, 1011 size_t size) 1012 { 1013 int ret = 0, i_ret = 0, b_ret = 0; 1014 struct buffer_head *di_bh = NULL; 1015 struct ocfs2_dinode *di = NULL; 1016 struct ocfs2_inode_info *oi = OCFS2_I(d_inode(dentry)); 1017 1018 if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb))) 1019 return -EOPNOTSUPP; 1020 1021 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) 1022 return ret; 1023 1024 ret = ocfs2_inode_lock(d_inode(dentry), &di_bh, 0); 1025 if (ret < 0) { 1026 mlog_errno(ret); 1027 return ret; 1028 } 1029 1030 di = (struct ocfs2_dinode *)di_bh->b_data; 1031 1032 down_read(&oi->ip_xattr_sem); 1033 i_ret = ocfs2_xattr_ibody_list(d_inode(dentry), di, buffer, size); 1034 if (i_ret < 0) 1035 b_ret = 0; 1036 else { 1037 if (buffer) { 1038 buffer += i_ret; 1039 size -= i_ret; 1040 } 1041 b_ret = ocfs2_xattr_block_list(d_inode(dentry), di, 1042 buffer, size); 1043 if (b_ret < 0) 1044 i_ret = 0; 1045 } 1046 up_read(&oi->ip_xattr_sem); 1047 ocfs2_inode_unlock(d_inode(dentry), 0); 1048 1049 brelse(di_bh); 1050 1051 return i_ret + b_ret; 1052 } 1053 1054 static int ocfs2_xattr_find_entry(int name_index, 1055 const char *name, 1056 struct ocfs2_xattr_search *xs) 1057 { 1058 struct ocfs2_xattr_entry *entry; 1059 size_t name_len; 1060 int i, cmp = 1; 1061 1062 if (name == NULL) 1063 return -EINVAL; 1064 1065 name_len = strlen(name); 1066 entry = xs->here; 1067 for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) { 1068 cmp = name_index - ocfs2_xattr_get_type(entry); 1069 if (!cmp) 1070 cmp = name_len - entry->xe_name_len; 1071 if (!cmp) 1072 cmp = memcmp(name, (xs->base + 1073 le16_to_cpu(entry->xe_name_offset)), 1074 name_len); 1075 if (cmp == 0) 1076 break; 1077 entry += 1; 1078 } 1079 xs->here = entry; 1080 1081 return cmp ? -ENODATA : 0; 1082 } 1083 1084 static int ocfs2_xattr_get_value_outside(struct inode *inode, 1085 struct ocfs2_xattr_value_root *xv, 1086 void *buffer, 1087 size_t len) 1088 { 1089 u32 cpos, p_cluster, num_clusters, bpc, clusters; 1090 u64 blkno; 1091 int i, ret = 0; 1092 size_t cplen, blocksize; 1093 struct buffer_head *bh = NULL; 1094 struct ocfs2_extent_list *el; 1095 1096 el = &xv->xr_list; 1097 clusters = le32_to_cpu(xv->xr_clusters); 1098 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 1099 blocksize = inode->i_sb->s_blocksize; 1100 1101 cpos = 0; 1102 while (cpos < clusters) { 1103 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, 1104 &num_clusters, el, NULL); 1105 if (ret) { 1106 mlog_errno(ret); 1107 goto out; 1108 } 1109 1110 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 1111 /* Copy ocfs2_xattr_value */ 1112 for (i = 0; i < num_clusters * bpc; i++, blkno++) { 1113 ret = ocfs2_read_block(INODE_CACHE(inode), blkno, 1114 &bh, NULL); 1115 if (ret) { 1116 mlog_errno(ret); 1117 goto out; 1118 } 1119 1120 cplen = len >= blocksize ? blocksize : len; 1121 memcpy(buffer, bh->b_data, cplen); 1122 len -= cplen; 1123 buffer += cplen; 1124 1125 brelse(bh); 1126 bh = NULL; 1127 if (len == 0) 1128 break; 1129 } 1130 cpos += num_clusters; 1131 } 1132 out: 1133 return ret; 1134 } 1135 1136 static int ocfs2_xattr_ibody_get(struct inode *inode, 1137 int name_index, 1138 const char *name, 1139 void *buffer, 1140 size_t buffer_size, 1141 struct ocfs2_xattr_search *xs) 1142 { 1143 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1144 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 1145 struct ocfs2_xattr_value_root *xv; 1146 size_t size; 1147 int ret = 0; 1148 1149 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) 1150 return -ENODATA; 1151 1152 xs->end = (void *)di + inode->i_sb->s_blocksize; 1153 xs->header = (struct ocfs2_xattr_header *) 1154 (xs->end - le16_to_cpu(di->i_xattr_inline_size)); 1155 xs->base = (void *)xs->header; 1156 xs->here = xs->header->xh_entries; 1157 1158 ret = ocfs2_xattr_find_entry(name_index, name, xs); 1159 if (ret) 1160 return ret; 1161 size = le64_to_cpu(xs->here->xe_value_size); 1162 if (buffer) { 1163 if (size > buffer_size) 1164 return -ERANGE; 1165 if (ocfs2_xattr_is_local(xs->here)) { 1166 memcpy(buffer, (void *)xs->base + 1167 le16_to_cpu(xs->here->xe_name_offset) + 1168 OCFS2_XATTR_SIZE(xs->here->xe_name_len), size); 1169 } else { 1170 xv = (struct ocfs2_xattr_value_root *) 1171 (xs->base + le16_to_cpu( 1172 xs->here->xe_name_offset) + 1173 OCFS2_XATTR_SIZE(xs->here->xe_name_len)); 1174 ret = ocfs2_xattr_get_value_outside(inode, xv, 1175 buffer, size); 1176 if (ret < 0) { 1177 mlog_errno(ret); 1178 return ret; 1179 } 1180 } 1181 } 1182 1183 return size; 1184 } 1185 1186 static int ocfs2_xattr_block_get(struct inode *inode, 1187 int name_index, 1188 const char *name, 1189 void *buffer, 1190 size_t buffer_size, 1191 struct ocfs2_xattr_search *xs) 1192 { 1193 struct ocfs2_xattr_block *xb; 1194 struct ocfs2_xattr_value_root *xv; 1195 size_t size; 1196 int ret = -ENODATA, name_offset, name_len, i; 1197 int uninitialized_var(block_off); 1198 1199 xs->bucket = ocfs2_xattr_bucket_new(inode); 1200 if (!xs->bucket) { 1201 ret = -ENOMEM; 1202 mlog_errno(ret); 1203 goto cleanup; 1204 } 1205 1206 ret = ocfs2_xattr_block_find(inode, name_index, name, xs); 1207 if (ret) { 1208 mlog_errno(ret); 1209 goto cleanup; 1210 } 1211 1212 if (xs->not_found) { 1213 ret = -ENODATA; 1214 goto cleanup; 1215 } 1216 1217 xb = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; 1218 size = le64_to_cpu(xs->here->xe_value_size); 1219 if (buffer) { 1220 ret = -ERANGE; 1221 if (size > buffer_size) 1222 goto cleanup; 1223 1224 name_offset = le16_to_cpu(xs->here->xe_name_offset); 1225 name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len); 1226 i = xs->here - xs->header->xh_entries; 1227 1228 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 1229 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 1230 bucket_xh(xs->bucket), 1231 i, 1232 &block_off, 1233 &name_offset); 1234 if (ret) { 1235 mlog_errno(ret); 1236 goto cleanup; 1237 } 1238 xs->base = bucket_block(xs->bucket, block_off); 1239 } 1240 if (ocfs2_xattr_is_local(xs->here)) { 1241 memcpy(buffer, (void *)xs->base + 1242 name_offset + name_len, size); 1243 } else { 1244 xv = (struct ocfs2_xattr_value_root *) 1245 (xs->base + name_offset + name_len); 1246 ret = ocfs2_xattr_get_value_outside(inode, xv, 1247 buffer, size); 1248 if (ret < 0) { 1249 mlog_errno(ret); 1250 goto cleanup; 1251 } 1252 } 1253 } 1254 ret = size; 1255 cleanup: 1256 ocfs2_xattr_bucket_free(xs->bucket); 1257 1258 brelse(xs->xattr_bh); 1259 xs->xattr_bh = NULL; 1260 return ret; 1261 } 1262 1263 int ocfs2_xattr_get_nolock(struct inode *inode, 1264 struct buffer_head *di_bh, 1265 int name_index, 1266 const char *name, 1267 void *buffer, 1268 size_t buffer_size) 1269 { 1270 int ret; 1271 struct ocfs2_dinode *di = NULL; 1272 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1273 struct ocfs2_xattr_search xis = { 1274 .not_found = -ENODATA, 1275 }; 1276 struct ocfs2_xattr_search xbs = { 1277 .not_found = -ENODATA, 1278 }; 1279 1280 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) 1281 return -EOPNOTSUPP; 1282 1283 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) 1284 return -ENODATA; 1285 1286 xis.inode_bh = xbs.inode_bh = di_bh; 1287 di = (struct ocfs2_dinode *)di_bh->b_data; 1288 1289 ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer, 1290 buffer_size, &xis); 1291 if (ret == -ENODATA && di->i_xattr_loc) 1292 ret = ocfs2_xattr_block_get(inode, name_index, name, buffer, 1293 buffer_size, &xbs); 1294 1295 return ret; 1296 } 1297 1298 /* ocfs2_xattr_get() 1299 * 1300 * Copy an extended attribute into the buffer provided. 1301 * Buffer is NULL to compute the size of buffer required. 1302 */ 1303 static int ocfs2_xattr_get(struct inode *inode, 1304 int name_index, 1305 const char *name, 1306 void *buffer, 1307 size_t buffer_size) 1308 { 1309 int ret; 1310 struct buffer_head *di_bh = NULL; 1311 1312 ret = ocfs2_inode_lock(inode, &di_bh, 0); 1313 if (ret < 0) { 1314 mlog_errno(ret); 1315 return ret; 1316 } 1317 down_read(&OCFS2_I(inode)->ip_xattr_sem); 1318 ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index, 1319 name, buffer, buffer_size); 1320 up_read(&OCFS2_I(inode)->ip_xattr_sem); 1321 1322 ocfs2_inode_unlock(inode, 0); 1323 1324 brelse(di_bh); 1325 1326 return ret; 1327 } 1328 1329 static int __ocfs2_xattr_set_value_outside(struct inode *inode, 1330 handle_t *handle, 1331 struct ocfs2_xattr_value_buf *vb, 1332 const void *value, 1333 int value_len) 1334 { 1335 int ret = 0, i, cp_len; 1336 u16 blocksize = inode->i_sb->s_blocksize; 1337 u32 p_cluster, num_clusters; 1338 u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 1339 u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len); 1340 u64 blkno; 1341 struct buffer_head *bh = NULL; 1342 unsigned int ext_flags; 1343 struct ocfs2_xattr_value_root *xv = vb->vb_xv; 1344 1345 BUG_ON(clusters > le32_to_cpu(xv->xr_clusters)); 1346 1347 while (cpos < clusters) { 1348 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, 1349 &num_clusters, &xv->xr_list, 1350 &ext_flags); 1351 if (ret) { 1352 mlog_errno(ret); 1353 goto out; 1354 } 1355 1356 BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED); 1357 1358 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 1359 1360 for (i = 0; i < num_clusters * bpc; i++, blkno++) { 1361 ret = ocfs2_read_block(INODE_CACHE(inode), blkno, 1362 &bh, NULL); 1363 if (ret) { 1364 mlog_errno(ret); 1365 goto out; 1366 } 1367 1368 ret = ocfs2_journal_access(handle, 1369 INODE_CACHE(inode), 1370 bh, 1371 OCFS2_JOURNAL_ACCESS_WRITE); 1372 if (ret < 0) { 1373 mlog_errno(ret); 1374 goto out; 1375 } 1376 1377 cp_len = value_len > blocksize ? blocksize : value_len; 1378 memcpy(bh->b_data, value, cp_len); 1379 value_len -= cp_len; 1380 value += cp_len; 1381 if (cp_len < blocksize) 1382 memset(bh->b_data + cp_len, 0, 1383 blocksize - cp_len); 1384 1385 ocfs2_journal_dirty(handle, bh); 1386 brelse(bh); 1387 bh = NULL; 1388 1389 /* 1390 * XXX: do we need to empty all the following 1391 * blocks in this cluster? 1392 */ 1393 if (!value_len) 1394 break; 1395 } 1396 cpos += num_clusters; 1397 } 1398 out: 1399 brelse(bh); 1400 1401 return ret; 1402 } 1403 1404 static int ocfs2_xa_check_space_helper(int needed_space, int free_start, 1405 int num_entries) 1406 { 1407 int free_space; 1408 1409 if (!needed_space) 1410 return 0; 1411 1412 free_space = free_start - 1413 sizeof(struct ocfs2_xattr_header) - 1414 (num_entries * sizeof(struct ocfs2_xattr_entry)) - 1415 OCFS2_XATTR_HEADER_GAP; 1416 if (free_space < 0) 1417 return -EIO; 1418 if (free_space < needed_space) 1419 return -ENOSPC; 1420 1421 return 0; 1422 } 1423 1424 static int ocfs2_xa_journal_access(handle_t *handle, struct ocfs2_xa_loc *loc, 1425 int type) 1426 { 1427 return loc->xl_ops->xlo_journal_access(handle, loc, type); 1428 } 1429 1430 static void ocfs2_xa_journal_dirty(handle_t *handle, struct ocfs2_xa_loc *loc) 1431 { 1432 loc->xl_ops->xlo_journal_dirty(handle, loc); 1433 } 1434 1435 /* Give a pointer into the storage for the given offset */ 1436 static void *ocfs2_xa_offset_pointer(struct ocfs2_xa_loc *loc, int offset) 1437 { 1438 BUG_ON(offset >= loc->xl_size); 1439 return loc->xl_ops->xlo_offset_pointer(loc, offset); 1440 } 1441 1442 /* 1443 * Wipe the name+value pair and allow the storage to reclaim it. This 1444 * must be followed by either removal of the entry or a call to 1445 * ocfs2_xa_add_namevalue(). 1446 */ 1447 static void ocfs2_xa_wipe_namevalue(struct ocfs2_xa_loc *loc) 1448 { 1449 loc->xl_ops->xlo_wipe_namevalue(loc); 1450 } 1451 1452 /* 1453 * Find lowest offset to a name+value pair. This is the start of our 1454 * downward-growing free space. 1455 */ 1456 static int ocfs2_xa_get_free_start(struct ocfs2_xa_loc *loc) 1457 { 1458 return loc->xl_ops->xlo_get_free_start(loc); 1459 } 1460 1461 /* Can we reuse loc->xl_entry for xi? */ 1462 static int ocfs2_xa_can_reuse_entry(struct ocfs2_xa_loc *loc, 1463 struct ocfs2_xattr_info *xi) 1464 { 1465 return loc->xl_ops->xlo_can_reuse(loc, xi); 1466 } 1467 1468 /* How much free space is needed to set the new value */ 1469 static int ocfs2_xa_check_space(struct ocfs2_xa_loc *loc, 1470 struct ocfs2_xattr_info *xi) 1471 { 1472 return loc->xl_ops->xlo_check_space(loc, xi); 1473 } 1474 1475 static void ocfs2_xa_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) 1476 { 1477 loc->xl_ops->xlo_add_entry(loc, name_hash); 1478 loc->xl_entry->xe_name_hash = cpu_to_le32(name_hash); 1479 /* 1480 * We can't leave the new entry's xe_name_offset at zero or 1481 * add_namevalue() will go nuts. We set it to the size of our 1482 * storage so that it can never be less than any other entry. 1483 */ 1484 loc->xl_entry->xe_name_offset = cpu_to_le16(loc->xl_size); 1485 } 1486 1487 static void ocfs2_xa_add_namevalue(struct ocfs2_xa_loc *loc, 1488 struct ocfs2_xattr_info *xi) 1489 { 1490 int size = namevalue_size_xi(xi); 1491 int nameval_offset; 1492 char *nameval_buf; 1493 1494 loc->xl_ops->xlo_add_namevalue(loc, size); 1495 loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len); 1496 loc->xl_entry->xe_name_len = xi->xi_name_len; 1497 ocfs2_xattr_set_type(loc->xl_entry, xi->xi_name_index); 1498 ocfs2_xattr_set_local(loc->xl_entry, 1499 xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE); 1500 1501 nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 1502 nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset); 1503 memset(nameval_buf, 0, size); 1504 memcpy(nameval_buf, xi->xi_name, xi->xi_name_len); 1505 } 1506 1507 static void ocfs2_xa_fill_value_buf(struct ocfs2_xa_loc *loc, 1508 struct ocfs2_xattr_value_buf *vb) 1509 { 1510 int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 1511 int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len); 1512 1513 /* Value bufs are for value trees */ 1514 BUG_ON(ocfs2_xattr_is_local(loc->xl_entry)); 1515 BUG_ON(namevalue_size_xe(loc->xl_entry) != 1516 (name_size + OCFS2_XATTR_ROOT_SIZE)); 1517 1518 loc->xl_ops->xlo_fill_value_buf(loc, vb); 1519 vb->vb_xv = 1520 (struct ocfs2_xattr_value_root *)ocfs2_xa_offset_pointer(loc, 1521 nameval_offset + 1522 name_size); 1523 } 1524 1525 static int ocfs2_xa_block_journal_access(handle_t *handle, 1526 struct ocfs2_xa_loc *loc, int type) 1527 { 1528 struct buffer_head *bh = loc->xl_storage; 1529 ocfs2_journal_access_func access; 1530 1531 if (loc->xl_size == (bh->b_size - 1532 offsetof(struct ocfs2_xattr_block, 1533 xb_attrs.xb_header))) 1534 access = ocfs2_journal_access_xb; 1535 else 1536 access = ocfs2_journal_access_di; 1537 return access(handle, INODE_CACHE(loc->xl_inode), bh, type); 1538 } 1539 1540 static void ocfs2_xa_block_journal_dirty(handle_t *handle, 1541 struct ocfs2_xa_loc *loc) 1542 { 1543 struct buffer_head *bh = loc->xl_storage; 1544 1545 ocfs2_journal_dirty(handle, bh); 1546 } 1547 1548 static void *ocfs2_xa_block_offset_pointer(struct ocfs2_xa_loc *loc, 1549 int offset) 1550 { 1551 return (char *)loc->xl_header + offset; 1552 } 1553 1554 static int ocfs2_xa_block_can_reuse(struct ocfs2_xa_loc *loc, 1555 struct ocfs2_xattr_info *xi) 1556 { 1557 /* 1558 * Block storage is strict. If the sizes aren't exact, we will 1559 * remove the old one and reinsert the new. 1560 */ 1561 return namevalue_size_xe(loc->xl_entry) == 1562 namevalue_size_xi(xi); 1563 } 1564 1565 static int ocfs2_xa_block_get_free_start(struct ocfs2_xa_loc *loc) 1566 { 1567 struct ocfs2_xattr_header *xh = loc->xl_header; 1568 int i, count = le16_to_cpu(xh->xh_count); 1569 int offset, free_start = loc->xl_size; 1570 1571 for (i = 0; i < count; i++) { 1572 offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset); 1573 if (offset < free_start) 1574 free_start = offset; 1575 } 1576 1577 return free_start; 1578 } 1579 1580 static int ocfs2_xa_block_check_space(struct ocfs2_xa_loc *loc, 1581 struct ocfs2_xattr_info *xi) 1582 { 1583 int count = le16_to_cpu(loc->xl_header->xh_count); 1584 int free_start = ocfs2_xa_get_free_start(loc); 1585 int needed_space = ocfs2_xi_entry_usage(xi); 1586 1587 /* 1588 * Block storage will reclaim the original entry before inserting 1589 * the new value, so we only need the difference. If the new 1590 * entry is smaller than the old one, we don't need anything. 1591 */ 1592 if (loc->xl_entry) { 1593 /* Don't need space if we're reusing! */ 1594 if (ocfs2_xa_can_reuse_entry(loc, xi)) 1595 needed_space = 0; 1596 else 1597 needed_space -= ocfs2_xe_entry_usage(loc->xl_entry); 1598 } 1599 if (needed_space < 0) 1600 needed_space = 0; 1601 return ocfs2_xa_check_space_helper(needed_space, free_start, count); 1602 } 1603 1604 /* 1605 * Block storage for xattrs keeps the name+value pairs compacted. When 1606 * we remove one, we have to shift any that preceded it towards the end. 1607 */ 1608 static void ocfs2_xa_block_wipe_namevalue(struct ocfs2_xa_loc *loc) 1609 { 1610 int i, offset; 1611 int namevalue_offset, first_namevalue_offset, namevalue_size; 1612 struct ocfs2_xattr_entry *entry = loc->xl_entry; 1613 struct ocfs2_xattr_header *xh = loc->xl_header; 1614 int count = le16_to_cpu(xh->xh_count); 1615 1616 namevalue_offset = le16_to_cpu(entry->xe_name_offset); 1617 namevalue_size = namevalue_size_xe(entry); 1618 first_namevalue_offset = ocfs2_xa_get_free_start(loc); 1619 1620 /* Shift the name+value pairs */ 1621 memmove((char *)xh + first_namevalue_offset + namevalue_size, 1622 (char *)xh + first_namevalue_offset, 1623 namevalue_offset - first_namevalue_offset); 1624 memset((char *)xh + first_namevalue_offset, 0, namevalue_size); 1625 1626 /* Now tell xh->xh_entries about it */ 1627 for (i = 0; i < count; i++) { 1628 offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset); 1629 if (offset <= namevalue_offset) 1630 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, 1631 namevalue_size); 1632 } 1633 1634 /* 1635 * Note that we don't update xh_free_start or xh_name_value_len 1636 * because they're not used in block-stored xattrs. 1637 */ 1638 } 1639 1640 static void ocfs2_xa_block_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) 1641 { 1642 int count = le16_to_cpu(loc->xl_header->xh_count); 1643 loc->xl_entry = &(loc->xl_header->xh_entries[count]); 1644 le16_add_cpu(&loc->xl_header->xh_count, 1); 1645 memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry)); 1646 } 1647 1648 static void ocfs2_xa_block_add_namevalue(struct ocfs2_xa_loc *loc, int size) 1649 { 1650 int free_start = ocfs2_xa_get_free_start(loc); 1651 1652 loc->xl_entry->xe_name_offset = cpu_to_le16(free_start - size); 1653 } 1654 1655 static void ocfs2_xa_block_fill_value_buf(struct ocfs2_xa_loc *loc, 1656 struct ocfs2_xattr_value_buf *vb) 1657 { 1658 struct buffer_head *bh = loc->xl_storage; 1659 1660 if (loc->xl_size == (bh->b_size - 1661 offsetof(struct ocfs2_xattr_block, 1662 xb_attrs.xb_header))) 1663 vb->vb_access = ocfs2_journal_access_xb; 1664 else 1665 vb->vb_access = ocfs2_journal_access_di; 1666 vb->vb_bh = bh; 1667 } 1668 1669 /* 1670 * Operations for xattrs stored in blocks. This includes inline inode 1671 * storage and unindexed ocfs2_xattr_blocks. 1672 */ 1673 static const struct ocfs2_xa_loc_operations ocfs2_xa_block_loc_ops = { 1674 .xlo_journal_access = ocfs2_xa_block_journal_access, 1675 .xlo_journal_dirty = ocfs2_xa_block_journal_dirty, 1676 .xlo_offset_pointer = ocfs2_xa_block_offset_pointer, 1677 .xlo_check_space = ocfs2_xa_block_check_space, 1678 .xlo_can_reuse = ocfs2_xa_block_can_reuse, 1679 .xlo_get_free_start = ocfs2_xa_block_get_free_start, 1680 .xlo_wipe_namevalue = ocfs2_xa_block_wipe_namevalue, 1681 .xlo_add_entry = ocfs2_xa_block_add_entry, 1682 .xlo_add_namevalue = ocfs2_xa_block_add_namevalue, 1683 .xlo_fill_value_buf = ocfs2_xa_block_fill_value_buf, 1684 }; 1685 1686 static int ocfs2_xa_bucket_journal_access(handle_t *handle, 1687 struct ocfs2_xa_loc *loc, int type) 1688 { 1689 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1690 1691 return ocfs2_xattr_bucket_journal_access(handle, bucket, type); 1692 } 1693 1694 static void ocfs2_xa_bucket_journal_dirty(handle_t *handle, 1695 struct ocfs2_xa_loc *loc) 1696 { 1697 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1698 1699 ocfs2_xattr_bucket_journal_dirty(handle, bucket); 1700 } 1701 1702 static void *ocfs2_xa_bucket_offset_pointer(struct ocfs2_xa_loc *loc, 1703 int offset) 1704 { 1705 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1706 int block, block_offset; 1707 1708 /* The header is at the front of the bucket */ 1709 block = offset >> loc->xl_inode->i_sb->s_blocksize_bits; 1710 block_offset = offset % loc->xl_inode->i_sb->s_blocksize; 1711 1712 return bucket_block(bucket, block) + block_offset; 1713 } 1714 1715 static int ocfs2_xa_bucket_can_reuse(struct ocfs2_xa_loc *loc, 1716 struct ocfs2_xattr_info *xi) 1717 { 1718 return namevalue_size_xe(loc->xl_entry) >= 1719 namevalue_size_xi(xi); 1720 } 1721 1722 static int ocfs2_xa_bucket_get_free_start(struct ocfs2_xa_loc *loc) 1723 { 1724 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1725 return le16_to_cpu(bucket_xh(bucket)->xh_free_start); 1726 } 1727 1728 static int ocfs2_bucket_align_free_start(struct super_block *sb, 1729 int free_start, int size) 1730 { 1731 /* 1732 * We need to make sure that the name+value pair fits within 1733 * one block. 1734 */ 1735 if (((free_start - size) >> sb->s_blocksize_bits) != 1736 ((free_start - 1) >> sb->s_blocksize_bits)) 1737 free_start -= free_start % sb->s_blocksize; 1738 1739 return free_start; 1740 } 1741 1742 static int ocfs2_xa_bucket_check_space(struct ocfs2_xa_loc *loc, 1743 struct ocfs2_xattr_info *xi) 1744 { 1745 int rc; 1746 int count = le16_to_cpu(loc->xl_header->xh_count); 1747 int free_start = ocfs2_xa_get_free_start(loc); 1748 int needed_space = ocfs2_xi_entry_usage(xi); 1749 int size = namevalue_size_xi(xi); 1750 struct super_block *sb = loc->xl_inode->i_sb; 1751 1752 /* 1753 * Bucket storage does not reclaim name+value pairs it cannot 1754 * reuse. They live as holes until the bucket fills, and then 1755 * the bucket is defragmented. However, the bucket can reclaim 1756 * the ocfs2_xattr_entry. 1757 */ 1758 if (loc->xl_entry) { 1759 /* Don't need space if we're reusing! */ 1760 if (ocfs2_xa_can_reuse_entry(loc, xi)) 1761 needed_space = 0; 1762 else 1763 needed_space -= sizeof(struct ocfs2_xattr_entry); 1764 } 1765 BUG_ON(needed_space < 0); 1766 1767 if (free_start < size) { 1768 if (needed_space) 1769 return -ENOSPC; 1770 } else { 1771 /* 1772 * First we check if it would fit in the first place. 1773 * Below, we align the free start to a block. This may 1774 * slide us below the minimum gap. By checking unaligned 1775 * first, we avoid that error. 1776 */ 1777 rc = ocfs2_xa_check_space_helper(needed_space, free_start, 1778 count); 1779 if (rc) 1780 return rc; 1781 free_start = ocfs2_bucket_align_free_start(sb, free_start, 1782 size); 1783 } 1784 return ocfs2_xa_check_space_helper(needed_space, free_start, count); 1785 } 1786 1787 static void ocfs2_xa_bucket_wipe_namevalue(struct ocfs2_xa_loc *loc) 1788 { 1789 le16_add_cpu(&loc->xl_header->xh_name_value_len, 1790 -namevalue_size_xe(loc->xl_entry)); 1791 } 1792 1793 static void ocfs2_xa_bucket_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) 1794 { 1795 struct ocfs2_xattr_header *xh = loc->xl_header; 1796 int count = le16_to_cpu(xh->xh_count); 1797 int low = 0, high = count - 1, tmp; 1798 struct ocfs2_xattr_entry *tmp_xe; 1799 1800 /* 1801 * We keep buckets sorted by name_hash, so we need to find 1802 * our insert place. 1803 */ 1804 while (low <= high && count) { 1805 tmp = (low + high) / 2; 1806 tmp_xe = &xh->xh_entries[tmp]; 1807 1808 if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash)) 1809 low = tmp + 1; 1810 else if (name_hash < le32_to_cpu(tmp_xe->xe_name_hash)) 1811 high = tmp - 1; 1812 else { 1813 low = tmp; 1814 break; 1815 } 1816 } 1817 1818 if (low != count) 1819 memmove(&xh->xh_entries[low + 1], 1820 &xh->xh_entries[low], 1821 ((count - low) * sizeof(struct ocfs2_xattr_entry))); 1822 1823 le16_add_cpu(&xh->xh_count, 1); 1824 loc->xl_entry = &xh->xh_entries[low]; 1825 memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry)); 1826 } 1827 1828 static void ocfs2_xa_bucket_add_namevalue(struct ocfs2_xa_loc *loc, int size) 1829 { 1830 int free_start = ocfs2_xa_get_free_start(loc); 1831 struct ocfs2_xattr_header *xh = loc->xl_header; 1832 struct super_block *sb = loc->xl_inode->i_sb; 1833 int nameval_offset; 1834 1835 free_start = ocfs2_bucket_align_free_start(sb, free_start, size); 1836 nameval_offset = free_start - size; 1837 loc->xl_entry->xe_name_offset = cpu_to_le16(nameval_offset); 1838 xh->xh_free_start = cpu_to_le16(nameval_offset); 1839 le16_add_cpu(&xh->xh_name_value_len, size); 1840 1841 } 1842 1843 static void ocfs2_xa_bucket_fill_value_buf(struct ocfs2_xa_loc *loc, 1844 struct ocfs2_xattr_value_buf *vb) 1845 { 1846 struct ocfs2_xattr_bucket *bucket = loc->xl_storage; 1847 struct super_block *sb = loc->xl_inode->i_sb; 1848 int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 1849 int size = namevalue_size_xe(loc->xl_entry); 1850 int block_offset = nameval_offset >> sb->s_blocksize_bits; 1851 1852 /* Values are not allowed to straddle block boundaries */ 1853 BUG_ON(block_offset != 1854 ((nameval_offset + size - 1) >> sb->s_blocksize_bits)); 1855 /* We expect the bucket to be filled in */ 1856 BUG_ON(!bucket->bu_bhs[block_offset]); 1857 1858 vb->vb_access = ocfs2_journal_access; 1859 vb->vb_bh = bucket->bu_bhs[block_offset]; 1860 } 1861 1862 /* Operations for xattrs stored in buckets. */ 1863 static const struct ocfs2_xa_loc_operations ocfs2_xa_bucket_loc_ops = { 1864 .xlo_journal_access = ocfs2_xa_bucket_journal_access, 1865 .xlo_journal_dirty = ocfs2_xa_bucket_journal_dirty, 1866 .xlo_offset_pointer = ocfs2_xa_bucket_offset_pointer, 1867 .xlo_check_space = ocfs2_xa_bucket_check_space, 1868 .xlo_can_reuse = ocfs2_xa_bucket_can_reuse, 1869 .xlo_get_free_start = ocfs2_xa_bucket_get_free_start, 1870 .xlo_wipe_namevalue = ocfs2_xa_bucket_wipe_namevalue, 1871 .xlo_add_entry = ocfs2_xa_bucket_add_entry, 1872 .xlo_add_namevalue = ocfs2_xa_bucket_add_namevalue, 1873 .xlo_fill_value_buf = ocfs2_xa_bucket_fill_value_buf, 1874 }; 1875 1876 static unsigned int ocfs2_xa_value_clusters(struct ocfs2_xa_loc *loc) 1877 { 1878 struct ocfs2_xattr_value_buf vb; 1879 1880 if (ocfs2_xattr_is_local(loc->xl_entry)) 1881 return 0; 1882 1883 ocfs2_xa_fill_value_buf(loc, &vb); 1884 return le32_to_cpu(vb.vb_xv->xr_clusters); 1885 } 1886 1887 static int ocfs2_xa_value_truncate(struct ocfs2_xa_loc *loc, u64 bytes, 1888 struct ocfs2_xattr_set_ctxt *ctxt) 1889 { 1890 int trunc_rc, access_rc; 1891 struct ocfs2_xattr_value_buf vb; 1892 1893 ocfs2_xa_fill_value_buf(loc, &vb); 1894 trunc_rc = ocfs2_xattr_value_truncate(loc->xl_inode, &vb, bytes, 1895 ctxt); 1896 1897 /* 1898 * The caller of ocfs2_xa_value_truncate() has already called 1899 * ocfs2_xa_journal_access on the loc. However, The truncate code 1900 * calls ocfs2_extend_trans(). This may commit the previous 1901 * transaction and open a new one. If this is a bucket, truncate 1902 * could leave only vb->vb_bh set up for journaling. Meanwhile, 1903 * the caller is expecting to dirty the entire bucket. So we must 1904 * reset the journal work. We do this even if truncate has failed, 1905 * as it could have failed after committing the extend. 1906 */ 1907 access_rc = ocfs2_xa_journal_access(ctxt->handle, loc, 1908 OCFS2_JOURNAL_ACCESS_WRITE); 1909 1910 /* Errors in truncate take precedence */ 1911 return trunc_rc ? trunc_rc : access_rc; 1912 } 1913 1914 static void ocfs2_xa_remove_entry(struct ocfs2_xa_loc *loc) 1915 { 1916 int index, count; 1917 struct ocfs2_xattr_header *xh = loc->xl_header; 1918 struct ocfs2_xattr_entry *entry = loc->xl_entry; 1919 1920 ocfs2_xa_wipe_namevalue(loc); 1921 loc->xl_entry = NULL; 1922 1923 le16_add_cpu(&xh->xh_count, -1); 1924 count = le16_to_cpu(xh->xh_count); 1925 1926 /* 1927 * Only zero out the entry if there are more remaining. This is 1928 * important for an empty bucket, as it keeps track of the 1929 * bucket's hash value. It doesn't hurt empty block storage. 1930 */ 1931 if (count) { 1932 index = ((char *)entry - (char *)&xh->xh_entries) / 1933 sizeof(struct ocfs2_xattr_entry); 1934 memmove(&xh->xh_entries[index], &xh->xh_entries[index + 1], 1935 (count - index) * sizeof(struct ocfs2_xattr_entry)); 1936 memset(&xh->xh_entries[count], 0, 1937 sizeof(struct ocfs2_xattr_entry)); 1938 } 1939 } 1940 1941 /* 1942 * If we have a problem adjusting the size of an external value during 1943 * ocfs2_xa_prepare_entry() or ocfs2_xa_remove(), we may have an xattr 1944 * in an intermediate state. For example, the value may be partially 1945 * truncated. 1946 * 1947 * If the value tree hasn't changed, the extend/truncate went nowhere. 1948 * We have nothing to do. The caller can treat it as a straight error. 1949 * 1950 * If the value tree got partially truncated, we now have a corrupted 1951 * extended attribute. We're going to wipe its entry and leak the 1952 * clusters. Better to leak some storage than leave a corrupt entry. 1953 * 1954 * If the value tree grew, it obviously didn't grow enough for the 1955 * new entry. We're not going to try and reclaim those clusters either. 1956 * If there was already an external value there (orig_clusters != 0), 1957 * the new clusters are attached safely and we can just leave the old 1958 * value in place. If there was no external value there, we remove 1959 * the entry. 1960 * 1961 * This way, the xattr block we store in the journal will be consistent. 1962 * If the size change broke because of the journal, no changes will hit 1963 * disk anyway. 1964 */ 1965 static void ocfs2_xa_cleanup_value_truncate(struct ocfs2_xa_loc *loc, 1966 const char *what, 1967 unsigned int orig_clusters) 1968 { 1969 unsigned int new_clusters = ocfs2_xa_value_clusters(loc); 1970 char *nameval_buf = ocfs2_xa_offset_pointer(loc, 1971 le16_to_cpu(loc->xl_entry->xe_name_offset)); 1972 1973 if (new_clusters < orig_clusters) { 1974 mlog(ML_ERROR, 1975 "Partial truncate while %s xattr %.*s. Leaking " 1976 "%u clusters and removing the entry\n", 1977 what, loc->xl_entry->xe_name_len, nameval_buf, 1978 orig_clusters - new_clusters); 1979 ocfs2_xa_remove_entry(loc); 1980 } else if (!orig_clusters) { 1981 mlog(ML_ERROR, 1982 "Unable to allocate an external value for xattr " 1983 "%.*s safely. Leaking %u clusters and removing the " 1984 "entry\n", 1985 loc->xl_entry->xe_name_len, nameval_buf, 1986 new_clusters - orig_clusters); 1987 ocfs2_xa_remove_entry(loc); 1988 } else if (new_clusters > orig_clusters) 1989 mlog(ML_ERROR, 1990 "Unable to grow xattr %.*s safely. %u new clusters " 1991 "have been added, but the value will not be " 1992 "modified\n", 1993 loc->xl_entry->xe_name_len, nameval_buf, 1994 new_clusters - orig_clusters); 1995 } 1996 1997 static int ocfs2_xa_remove(struct ocfs2_xa_loc *loc, 1998 struct ocfs2_xattr_set_ctxt *ctxt) 1999 { 2000 int rc = 0; 2001 unsigned int orig_clusters; 2002 2003 if (!ocfs2_xattr_is_local(loc->xl_entry)) { 2004 orig_clusters = ocfs2_xa_value_clusters(loc); 2005 rc = ocfs2_xa_value_truncate(loc, 0, ctxt); 2006 if (rc) { 2007 mlog_errno(rc); 2008 /* 2009 * Since this is remove, we can return 0 if 2010 * ocfs2_xa_cleanup_value_truncate() is going to 2011 * wipe the entry anyway. So we check the 2012 * cluster count as well. 2013 */ 2014 if (orig_clusters != ocfs2_xa_value_clusters(loc)) 2015 rc = 0; 2016 ocfs2_xa_cleanup_value_truncate(loc, "removing", 2017 orig_clusters); 2018 if (rc) 2019 goto out; 2020 } 2021 } 2022 2023 ocfs2_xa_remove_entry(loc); 2024 2025 out: 2026 return rc; 2027 } 2028 2029 static void ocfs2_xa_install_value_root(struct ocfs2_xa_loc *loc) 2030 { 2031 int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len); 2032 char *nameval_buf; 2033 2034 nameval_buf = ocfs2_xa_offset_pointer(loc, 2035 le16_to_cpu(loc->xl_entry->xe_name_offset)); 2036 memcpy(nameval_buf + name_size, &def_xv, OCFS2_XATTR_ROOT_SIZE); 2037 } 2038 2039 /* 2040 * Take an existing entry and make it ready for the new value. This 2041 * won't allocate space, but it may free space. It should be ready for 2042 * ocfs2_xa_prepare_entry() to finish the work. 2043 */ 2044 static int ocfs2_xa_reuse_entry(struct ocfs2_xa_loc *loc, 2045 struct ocfs2_xattr_info *xi, 2046 struct ocfs2_xattr_set_ctxt *ctxt) 2047 { 2048 int rc = 0; 2049 int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len); 2050 unsigned int orig_clusters; 2051 char *nameval_buf; 2052 int xe_local = ocfs2_xattr_is_local(loc->xl_entry); 2053 int xi_local = xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE; 2054 2055 BUG_ON(OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len) != 2056 name_size); 2057 2058 nameval_buf = ocfs2_xa_offset_pointer(loc, 2059 le16_to_cpu(loc->xl_entry->xe_name_offset)); 2060 if (xe_local) { 2061 memset(nameval_buf + name_size, 0, 2062 namevalue_size_xe(loc->xl_entry) - name_size); 2063 if (!xi_local) 2064 ocfs2_xa_install_value_root(loc); 2065 } else { 2066 orig_clusters = ocfs2_xa_value_clusters(loc); 2067 if (xi_local) { 2068 rc = ocfs2_xa_value_truncate(loc, 0, ctxt); 2069 if (rc < 0) 2070 mlog_errno(rc); 2071 else 2072 memset(nameval_buf + name_size, 0, 2073 namevalue_size_xe(loc->xl_entry) - 2074 name_size); 2075 } else if (le64_to_cpu(loc->xl_entry->xe_value_size) > 2076 xi->xi_value_len) { 2077 rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, 2078 ctxt); 2079 if (rc < 0) 2080 mlog_errno(rc); 2081 } 2082 2083 if (rc) { 2084 ocfs2_xa_cleanup_value_truncate(loc, "reusing", 2085 orig_clusters); 2086 goto out; 2087 } 2088 } 2089 2090 loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len); 2091 ocfs2_xattr_set_local(loc->xl_entry, xi_local); 2092 2093 out: 2094 return rc; 2095 } 2096 2097 /* 2098 * Prepares loc->xl_entry to receive the new xattr. This includes 2099 * properly setting up the name+value pair region. If loc->xl_entry 2100 * already exists, it will take care of modifying it appropriately. 2101 * 2102 * Note that this modifies the data. You did journal_access already, 2103 * right? 2104 */ 2105 static int ocfs2_xa_prepare_entry(struct ocfs2_xa_loc *loc, 2106 struct ocfs2_xattr_info *xi, 2107 u32 name_hash, 2108 struct ocfs2_xattr_set_ctxt *ctxt) 2109 { 2110 int rc = 0; 2111 unsigned int orig_clusters; 2112 __le64 orig_value_size = 0; 2113 2114 rc = ocfs2_xa_check_space(loc, xi); 2115 if (rc) 2116 goto out; 2117 2118 if (loc->xl_entry) { 2119 if (ocfs2_xa_can_reuse_entry(loc, xi)) { 2120 orig_value_size = loc->xl_entry->xe_value_size; 2121 rc = ocfs2_xa_reuse_entry(loc, xi, ctxt); 2122 if (rc) 2123 goto out; 2124 goto alloc_value; 2125 } 2126 2127 if (!ocfs2_xattr_is_local(loc->xl_entry)) { 2128 orig_clusters = ocfs2_xa_value_clusters(loc); 2129 rc = ocfs2_xa_value_truncate(loc, 0, ctxt); 2130 if (rc) { 2131 mlog_errno(rc); 2132 ocfs2_xa_cleanup_value_truncate(loc, 2133 "overwriting", 2134 orig_clusters); 2135 goto out; 2136 } 2137 } 2138 ocfs2_xa_wipe_namevalue(loc); 2139 } else 2140 ocfs2_xa_add_entry(loc, name_hash); 2141 2142 /* 2143 * If we get here, we have a blank entry. Fill it. We grow our 2144 * name+value pair back from the end. 2145 */ 2146 ocfs2_xa_add_namevalue(loc, xi); 2147 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) 2148 ocfs2_xa_install_value_root(loc); 2149 2150 alloc_value: 2151 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 2152 orig_clusters = ocfs2_xa_value_clusters(loc); 2153 rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, ctxt); 2154 if (rc < 0) { 2155 ctxt->set_abort = 1; 2156 ocfs2_xa_cleanup_value_truncate(loc, "growing", 2157 orig_clusters); 2158 /* 2159 * If we were growing an existing value, 2160 * ocfs2_xa_cleanup_value_truncate() won't remove 2161 * the entry. We need to restore the original value 2162 * size. 2163 */ 2164 if (loc->xl_entry) { 2165 BUG_ON(!orig_value_size); 2166 loc->xl_entry->xe_value_size = orig_value_size; 2167 } 2168 mlog_errno(rc); 2169 } 2170 } 2171 2172 out: 2173 return rc; 2174 } 2175 2176 /* 2177 * Store the value portion of the name+value pair. This will skip 2178 * values that are stored externally. Their tree roots were set up 2179 * by ocfs2_xa_prepare_entry(). 2180 */ 2181 static int ocfs2_xa_store_value(struct ocfs2_xa_loc *loc, 2182 struct ocfs2_xattr_info *xi, 2183 struct ocfs2_xattr_set_ctxt *ctxt) 2184 { 2185 int rc = 0; 2186 int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); 2187 int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len); 2188 char *nameval_buf; 2189 struct ocfs2_xattr_value_buf vb; 2190 2191 nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset); 2192 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 2193 ocfs2_xa_fill_value_buf(loc, &vb); 2194 rc = __ocfs2_xattr_set_value_outside(loc->xl_inode, 2195 ctxt->handle, &vb, 2196 xi->xi_value, 2197 xi->xi_value_len); 2198 } else 2199 memcpy(nameval_buf + name_size, xi->xi_value, xi->xi_value_len); 2200 2201 return rc; 2202 } 2203 2204 static int ocfs2_xa_set(struct ocfs2_xa_loc *loc, 2205 struct ocfs2_xattr_info *xi, 2206 struct ocfs2_xattr_set_ctxt *ctxt) 2207 { 2208 int ret; 2209 u32 name_hash = ocfs2_xattr_name_hash(loc->xl_inode, xi->xi_name, 2210 xi->xi_name_len); 2211 2212 ret = ocfs2_xa_journal_access(ctxt->handle, loc, 2213 OCFS2_JOURNAL_ACCESS_WRITE); 2214 if (ret) { 2215 mlog_errno(ret); 2216 goto out; 2217 } 2218 2219 /* 2220 * From here on out, everything is going to modify the buffer a 2221 * little. Errors are going to leave the xattr header in a 2222 * sane state. Thus, even with errors we dirty the sucker. 2223 */ 2224 2225 /* Don't worry, we are never called with !xi_value and !xl_entry */ 2226 if (!xi->xi_value) { 2227 ret = ocfs2_xa_remove(loc, ctxt); 2228 goto out_dirty; 2229 } 2230 2231 ret = ocfs2_xa_prepare_entry(loc, xi, name_hash, ctxt); 2232 if (ret) { 2233 if (ret != -ENOSPC) 2234 mlog_errno(ret); 2235 goto out_dirty; 2236 } 2237 2238 ret = ocfs2_xa_store_value(loc, xi, ctxt); 2239 if (ret) 2240 mlog_errno(ret); 2241 2242 out_dirty: 2243 ocfs2_xa_journal_dirty(ctxt->handle, loc); 2244 2245 out: 2246 return ret; 2247 } 2248 2249 static void ocfs2_init_dinode_xa_loc(struct ocfs2_xa_loc *loc, 2250 struct inode *inode, 2251 struct buffer_head *bh, 2252 struct ocfs2_xattr_entry *entry) 2253 { 2254 struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; 2255 2256 BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_XATTR_FL)); 2257 2258 loc->xl_inode = inode; 2259 loc->xl_ops = &ocfs2_xa_block_loc_ops; 2260 loc->xl_storage = bh; 2261 loc->xl_entry = entry; 2262 loc->xl_size = le16_to_cpu(di->i_xattr_inline_size); 2263 loc->xl_header = 2264 (struct ocfs2_xattr_header *)(bh->b_data + bh->b_size - 2265 loc->xl_size); 2266 } 2267 2268 static void ocfs2_init_xattr_block_xa_loc(struct ocfs2_xa_loc *loc, 2269 struct inode *inode, 2270 struct buffer_head *bh, 2271 struct ocfs2_xattr_entry *entry) 2272 { 2273 struct ocfs2_xattr_block *xb = 2274 (struct ocfs2_xattr_block *)bh->b_data; 2275 2276 BUG_ON(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED); 2277 2278 loc->xl_inode = inode; 2279 loc->xl_ops = &ocfs2_xa_block_loc_ops; 2280 loc->xl_storage = bh; 2281 loc->xl_header = &(xb->xb_attrs.xb_header); 2282 loc->xl_entry = entry; 2283 loc->xl_size = bh->b_size - offsetof(struct ocfs2_xattr_block, 2284 xb_attrs.xb_header); 2285 } 2286 2287 static void ocfs2_init_xattr_bucket_xa_loc(struct ocfs2_xa_loc *loc, 2288 struct ocfs2_xattr_bucket *bucket, 2289 struct ocfs2_xattr_entry *entry) 2290 { 2291 loc->xl_inode = bucket->bu_inode; 2292 loc->xl_ops = &ocfs2_xa_bucket_loc_ops; 2293 loc->xl_storage = bucket; 2294 loc->xl_header = bucket_xh(bucket); 2295 loc->xl_entry = entry; 2296 loc->xl_size = OCFS2_XATTR_BUCKET_SIZE; 2297 } 2298 2299 /* 2300 * In xattr remove, if it is stored outside and refcounted, we may have 2301 * the chance to split the refcount tree. So need the allocators. 2302 */ 2303 static int ocfs2_lock_xattr_remove_allocators(struct inode *inode, 2304 struct ocfs2_xattr_value_root *xv, 2305 struct ocfs2_caching_info *ref_ci, 2306 struct buffer_head *ref_root_bh, 2307 struct ocfs2_alloc_context **meta_ac, 2308 int *ref_credits) 2309 { 2310 int ret, meta_add = 0; 2311 u32 p_cluster, num_clusters; 2312 unsigned int ext_flags; 2313 2314 *ref_credits = 0; 2315 ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster, 2316 &num_clusters, 2317 &xv->xr_list, 2318 &ext_flags); 2319 if (ret) { 2320 mlog_errno(ret); 2321 goto out; 2322 } 2323 2324 if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) 2325 goto out; 2326 2327 ret = ocfs2_refcounted_xattr_delete_need(inode, ref_ci, 2328 ref_root_bh, xv, 2329 &meta_add, ref_credits); 2330 if (ret) { 2331 mlog_errno(ret); 2332 goto out; 2333 } 2334 2335 ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb), 2336 meta_add, meta_ac); 2337 if (ret) 2338 mlog_errno(ret); 2339 2340 out: 2341 return ret; 2342 } 2343 2344 static int ocfs2_remove_value_outside(struct inode*inode, 2345 struct ocfs2_xattr_value_buf *vb, 2346 struct ocfs2_xattr_header *header, 2347 struct ocfs2_caching_info *ref_ci, 2348 struct buffer_head *ref_root_bh) 2349 { 2350 int ret = 0, i, ref_credits; 2351 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2352 struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, }; 2353 void *val; 2354 2355 ocfs2_init_dealloc_ctxt(&ctxt.dealloc); 2356 2357 for (i = 0; i < le16_to_cpu(header->xh_count); i++) { 2358 struct ocfs2_xattr_entry *entry = &header->xh_entries[i]; 2359 2360 if (ocfs2_xattr_is_local(entry)) 2361 continue; 2362 2363 val = (void *)header + 2364 le16_to_cpu(entry->xe_name_offset); 2365 vb->vb_xv = (struct ocfs2_xattr_value_root *) 2366 (val + OCFS2_XATTR_SIZE(entry->xe_name_len)); 2367 2368 ret = ocfs2_lock_xattr_remove_allocators(inode, vb->vb_xv, 2369 ref_ci, ref_root_bh, 2370 &ctxt.meta_ac, 2371 &ref_credits); 2372 2373 ctxt.handle = ocfs2_start_trans(osb, ref_credits + 2374 ocfs2_remove_extent_credits(osb->sb)); 2375 if (IS_ERR(ctxt.handle)) { 2376 ret = PTR_ERR(ctxt.handle); 2377 mlog_errno(ret); 2378 break; 2379 } 2380 2381 ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt); 2382 2383 ocfs2_commit_trans(osb, ctxt.handle); 2384 if (ctxt.meta_ac) { 2385 ocfs2_free_alloc_context(ctxt.meta_ac); 2386 ctxt.meta_ac = NULL; 2387 } 2388 2389 if (ret < 0) { 2390 mlog_errno(ret); 2391 break; 2392 } 2393 2394 } 2395 2396 if (ctxt.meta_ac) 2397 ocfs2_free_alloc_context(ctxt.meta_ac); 2398 ocfs2_schedule_truncate_log_flush(osb, 1); 2399 ocfs2_run_deallocs(osb, &ctxt.dealloc); 2400 return ret; 2401 } 2402 2403 static int ocfs2_xattr_ibody_remove(struct inode *inode, 2404 struct buffer_head *di_bh, 2405 struct ocfs2_caching_info *ref_ci, 2406 struct buffer_head *ref_root_bh) 2407 { 2408 2409 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 2410 struct ocfs2_xattr_header *header; 2411 int ret; 2412 struct ocfs2_xattr_value_buf vb = { 2413 .vb_bh = di_bh, 2414 .vb_access = ocfs2_journal_access_di, 2415 }; 2416 2417 header = (struct ocfs2_xattr_header *) 2418 ((void *)di + inode->i_sb->s_blocksize - 2419 le16_to_cpu(di->i_xattr_inline_size)); 2420 2421 ret = ocfs2_remove_value_outside(inode, &vb, header, 2422 ref_ci, ref_root_bh); 2423 2424 return ret; 2425 } 2426 2427 struct ocfs2_rm_xattr_bucket_para { 2428 struct ocfs2_caching_info *ref_ci; 2429 struct buffer_head *ref_root_bh; 2430 }; 2431 2432 static int ocfs2_xattr_block_remove(struct inode *inode, 2433 struct buffer_head *blk_bh, 2434 struct ocfs2_caching_info *ref_ci, 2435 struct buffer_head *ref_root_bh) 2436 { 2437 struct ocfs2_xattr_block *xb; 2438 int ret = 0; 2439 struct ocfs2_xattr_value_buf vb = { 2440 .vb_bh = blk_bh, 2441 .vb_access = ocfs2_journal_access_xb, 2442 }; 2443 struct ocfs2_rm_xattr_bucket_para args = { 2444 .ref_ci = ref_ci, 2445 .ref_root_bh = ref_root_bh, 2446 }; 2447 2448 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 2449 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 2450 struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header); 2451 ret = ocfs2_remove_value_outside(inode, &vb, header, 2452 ref_ci, ref_root_bh); 2453 } else 2454 ret = ocfs2_iterate_xattr_index_block(inode, 2455 blk_bh, 2456 ocfs2_rm_xattr_cluster, 2457 &args); 2458 2459 return ret; 2460 } 2461 2462 static int ocfs2_xattr_free_block(struct inode *inode, 2463 u64 block, 2464 struct ocfs2_caching_info *ref_ci, 2465 struct buffer_head *ref_root_bh) 2466 { 2467 struct inode *xb_alloc_inode; 2468 struct buffer_head *xb_alloc_bh = NULL; 2469 struct buffer_head *blk_bh = NULL; 2470 struct ocfs2_xattr_block *xb; 2471 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2472 handle_t *handle; 2473 int ret = 0; 2474 u64 blk, bg_blkno; 2475 u16 bit; 2476 2477 ret = ocfs2_read_xattr_block(inode, block, &blk_bh); 2478 if (ret < 0) { 2479 mlog_errno(ret); 2480 goto out; 2481 } 2482 2483 ret = ocfs2_xattr_block_remove(inode, blk_bh, ref_ci, ref_root_bh); 2484 if (ret < 0) { 2485 mlog_errno(ret); 2486 goto out; 2487 } 2488 2489 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 2490 blk = le64_to_cpu(xb->xb_blkno); 2491 bit = le16_to_cpu(xb->xb_suballoc_bit); 2492 if (xb->xb_suballoc_loc) 2493 bg_blkno = le64_to_cpu(xb->xb_suballoc_loc); 2494 else 2495 bg_blkno = ocfs2_which_suballoc_group(blk, bit); 2496 2497 xb_alloc_inode = ocfs2_get_system_file_inode(osb, 2498 EXTENT_ALLOC_SYSTEM_INODE, 2499 le16_to_cpu(xb->xb_suballoc_slot)); 2500 if (!xb_alloc_inode) { 2501 ret = -ENOMEM; 2502 mlog_errno(ret); 2503 goto out; 2504 } 2505 mutex_lock(&xb_alloc_inode->i_mutex); 2506 2507 ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1); 2508 if (ret < 0) { 2509 mlog_errno(ret); 2510 goto out_mutex; 2511 } 2512 2513 handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE); 2514 if (IS_ERR(handle)) { 2515 ret = PTR_ERR(handle); 2516 mlog_errno(ret); 2517 goto out_unlock; 2518 } 2519 2520 ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh, 2521 bit, bg_blkno, 1); 2522 if (ret < 0) 2523 mlog_errno(ret); 2524 2525 ocfs2_commit_trans(osb, handle); 2526 out_unlock: 2527 ocfs2_inode_unlock(xb_alloc_inode, 1); 2528 brelse(xb_alloc_bh); 2529 out_mutex: 2530 mutex_unlock(&xb_alloc_inode->i_mutex); 2531 iput(xb_alloc_inode); 2532 out: 2533 brelse(blk_bh); 2534 return ret; 2535 } 2536 2537 /* 2538 * ocfs2_xattr_remove() 2539 * 2540 * Free extended attribute resources associated with this inode. 2541 */ 2542 int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh) 2543 { 2544 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2545 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 2546 struct ocfs2_refcount_tree *ref_tree = NULL; 2547 struct buffer_head *ref_root_bh = NULL; 2548 struct ocfs2_caching_info *ref_ci = NULL; 2549 handle_t *handle; 2550 int ret; 2551 2552 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) 2553 return 0; 2554 2555 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) 2556 return 0; 2557 2558 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL) { 2559 ret = ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb), 2560 le64_to_cpu(di->i_refcount_loc), 2561 1, &ref_tree, &ref_root_bh); 2562 if (ret) { 2563 mlog_errno(ret); 2564 goto out; 2565 } 2566 ref_ci = &ref_tree->rf_ci; 2567 2568 } 2569 2570 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 2571 ret = ocfs2_xattr_ibody_remove(inode, di_bh, 2572 ref_ci, ref_root_bh); 2573 if (ret < 0) { 2574 mlog_errno(ret); 2575 goto out; 2576 } 2577 } 2578 2579 if (di->i_xattr_loc) { 2580 ret = ocfs2_xattr_free_block(inode, 2581 le64_to_cpu(di->i_xattr_loc), 2582 ref_ci, ref_root_bh); 2583 if (ret < 0) { 2584 mlog_errno(ret); 2585 goto out; 2586 } 2587 } 2588 2589 handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), 2590 OCFS2_INODE_UPDATE_CREDITS); 2591 if (IS_ERR(handle)) { 2592 ret = PTR_ERR(handle); 2593 mlog_errno(ret); 2594 goto out; 2595 } 2596 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, 2597 OCFS2_JOURNAL_ACCESS_WRITE); 2598 if (ret) { 2599 mlog_errno(ret); 2600 goto out_commit; 2601 } 2602 2603 di->i_xattr_loc = 0; 2604 2605 spin_lock(&oi->ip_lock); 2606 oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL); 2607 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 2608 spin_unlock(&oi->ip_lock); 2609 ocfs2_update_inode_fsync_trans(handle, inode, 0); 2610 2611 ocfs2_journal_dirty(handle, di_bh); 2612 out_commit: 2613 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); 2614 out: 2615 if (ref_tree) 2616 ocfs2_unlock_refcount_tree(OCFS2_SB(inode->i_sb), ref_tree, 1); 2617 brelse(ref_root_bh); 2618 return ret; 2619 } 2620 2621 static int ocfs2_xattr_has_space_inline(struct inode *inode, 2622 struct ocfs2_dinode *di) 2623 { 2624 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2625 unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size; 2626 int free; 2627 2628 if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE) 2629 return 0; 2630 2631 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 2632 struct ocfs2_inline_data *idata = &di->id2.i_data; 2633 free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size); 2634 } else if (ocfs2_inode_is_fast_symlink(inode)) { 2635 free = ocfs2_fast_symlink_chars(inode->i_sb) - 2636 le64_to_cpu(di->i_size); 2637 } else { 2638 struct ocfs2_extent_list *el = &di->id2.i_list; 2639 free = (le16_to_cpu(el->l_count) - 2640 le16_to_cpu(el->l_next_free_rec)) * 2641 sizeof(struct ocfs2_extent_rec); 2642 } 2643 if (free >= xattrsize) 2644 return 1; 2645 2646 return 0; 2647 } 2648 2649 /* 2650 * ocfs2_xattr_ibody_find() 2651 * 2652 * Find extended attribute in inode block and 2653 * fill search info into struct ocfs2_xattr_search. 2654 */ 2655 static int ocfs2_xattr_ibody_find(struct inode *inode, 2656 int name_index, 2657 const char *name, 2658 struct ocfs2_xattr_search *xs) 2659 { 2660 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2661 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 2662 int ret; 2663 int has_space = 0; 2664 2665 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) 2666 return 0; 2667 2668 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { 2669 down_read(&oi->ip_alloc_sem); 2670 has_space = ocfs2_xattr_has_space_inline(inode, di); 2671 up_read(&oi->ip_alloc_sem); 2672 if (!has_space) 2673 return 0; 2674 } 2675 2676 xs->xattr_bh = xs->inode_bh; 2677 xs->end = (void *)di + inode->i_sb->s_blocksize; 2678 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) 2679 xs->header = (struct ocfs2_xattr_header *) 2680 (xs->end - le16_to_cpu(di->i_xattr_inline_size)); 2681 else 2682 xs->header = (struct ocfs2_xattr_header *) 2683 (xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size); 2684 xs->base = (void *)xs->header; 2685 xs->here = xs->header->xh_entries; 2686 2687 /* Find the named attribute. */ 2688 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 2689 ret = ocfs2_xattr_find_entry(name_index, name, xs); 2690 if (ret && ret != -ENODATA) 2691 return ret; 2692 xs->not_found = ret; 2693 } 2694 2695 return 0; 2696 } 2697 2698 static int ocfs2_xattr_ibody_init(struct inode *inode, 2699 struct buffer_head *di_bh, 2700 struct ocfs2_xattr_set_ctxt *ctxt) 2701 { 2702 int ret; 2703 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2704 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 2705 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2706 unsigned int xattrsize = osb->s_xattr_inline_size; 2707 2708 if (!ocfs2_xattr_has_space_inline(inode, di)) { 2709 ret = -ENOSPC; 2710 goto out; 2711 } 2712 2713 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), di_bh, 2714 OCFS2_JOURNAL_ACCESS_WRITE); 2715 if (ret) { 2716 mlog_errno(ret); 2717 goto out; 2718 } 2719 2720 /* 2721 * Adjust extent record count or inline data size 2722 * to reserve space for extended attribute. 2723 */ 2724 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 2725 struct ocfs2_inline_data *idata = &di->id2.i_data; 2726 le16_add_cpu(&idata->id_count, -xattrsize); 2727 } else if (!(ocfs2_inode_is_fast_symlink(inode))) { 2728 struct ocfs2_extent_list *el = &di->id2.i_list; 2729 le16_add_cpu(&el->l_count, -(xattrsize / 2730 sizeof(struct ocfs2_extent_rec))); 2731 } 2732 di->i_xattr_inline_size = cpu_to_le16(xattrsize); 2733 2734 spin_lock(&oi->ip_lock); 2735 oi->ip_dyn_features |= OCFS2_INLINE_XATTR_FL|OCFS2_HAS_XATTR_FL; 2736 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 2737 spin_unlock(&oi->ip_lock); 2738 2739 ocfs2_journal_dirty(ctxt->handle, di_bh); 2740 2741 out: 2742 return ret; 2743 } 2744 2745 /* 2746 * ocfs2_xattr_ibody_set() 2747 * 2748 * Set, replace or remove an extended attribute into inode block. 2749 * 2750 */ 2751 static int ocfs2_xattr_ibody_set(struct inode *inode, 2752 struct ocfs2_xattr_info *xi, 2753 struct ocfs2_xattr_search *xs, 2754 struct ocfs2_xattr_set_ctxt *ctxt) 2755 { 2756 int ret; 2757 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2758 struct ocfs2_xa_loc loc; 2759 2760 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) 2761 return -ENOSPC; 2762 2763 down_write(&oi->ip_alloc_sem); 2764 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { 2765 ret = ocfs2_xattr_ibody_init(inode, xs->inode_bh, ctxt); 2766 if (ret) { 2767 if (ret != -ENOSPC) 2768 mlog_errno(ret); 2769 goto out; 2770 } 2771 } 2772 2773 ocfs2_init_dinode_xa_loc(&loc, inode, xs->inode_bh, 2774 xs->not_found ? NULL : xs->here); 2775 ret = ocfs2_xa_set(&loc, xi, ctxt); 2776 if (ret) { 2777 if (ret != -ENOSPC) 2778 mlog_errno(ret); 2779 goto out; 2780 } 2781 xs->here = loc.xl_entry; 2782 2783 out: 2784 up_write(&oi->ip_alloc_sem); 2785 2786 return ret; 2787 } 2788 2789 /* 2790 * ocfs2_xattr_block_find() 2791 * 2792 * Find extended attribute in external block and 2793 * fill search info into struct ocfs2_xattr_search. 2794 */ 2795 static int ocfs2_xattr_block_find(struct inode *inode, 2796 int name_index, 2797 const char *name, 2798 struct ocfs2_xattr_search *xs) 2799 { 2800 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 2801 struct buffer_head *blk_bh = NULL; 2802 struct ocfs2_xattr_block *xb; 2803 int ret = 0; 2804 2805 if (!di->i_xattr_loc) 2806 return ret; 2807 2808 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc), 2809 &blk_bh); 2810 if (ret < 0) { 2811 mlog_errno(ret); 2812 return ret; 2813 } 2814 2815 xs->xattr_bh = blk_bh; 2816 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 2817 2818 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 2819 xs->header = &xb->xb_attrs.xb_header; 2820 xs->base = (void *)xs->header; 2821 xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size; 2822 xs->here = xs->header->xh_entries; 2823 2824 ret = ocfs2_xattr_find_entry(name_index, name, xs); 2825 } else 2826 ret = ocfs2_xattr_index_block_find(inode, blk_bh, 2827 name_index, 2828 name, xs); 2829 2830 if (ret && ret != -ENODATA) { 2831 xs->xattr_bh = NULL; 2832 goto cleanup; 2833 } 2834 xs->not_found = ret; 2835 return 0; 2836 cleanup: 2837 brelse(blk_bh); 2838 2839 return ret; 2840 } 2841 2842 static int ocfs2_create_xattr_block(struct inode *inode, 2843 struct buffer_head *inode_bh, 2844 struct ocfs2_xattr_set_ctxt *ctxt, 2845 int indexed, 2846 struct buffer_head **ret_bh) 2847 { 2848 int ret; 2849 u16 suballoc_bit_start; 2850 u32 num_got; 2851 u64 suballoc_loc, first_blkno; 2852 struct ocfs2_dinode *di = (struct ocfs2_dinode *)inode_bh->b_data; 2853 struct buffer_head *new_bh = NULL; 2854 struct ocfs2_xattr_block *xblk; 2855 2856 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), 2857 inode_bh, OCFS2_JOURNAL_ACCESS_CREATE); 2858 if (ret < 0) { 2859 mlog_errno(ret); 2860 goto end; 2861 } 2862 2863 ret = ocfs2_claim_metadata(ctxt->handle, ctxt->meta_ac, 1, 2864 &suballoc_loc, &suballoc_bit_start, 2865 &num_got, &first_blkno); 2866 if (ret < 0) { 2867 mlog_errno(ret); 2868 goto end; 2869 } 2870 2871 new_bh = sb_getblk(inode->i_sb, first_blkno); 2872 if (!new_bh) { 2873 ret = -ENOMEM; 2874 mlog_errno(ret); 2875 goto end; 2876 } 2877 2878 ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh); 2879 2880 ret = ocfs2_journal_access_xb(ctxt->handle, INODE_CACHE(inode), 2881 new_bh, 2882 OCFS2_JOURNAL_ACCESS_CREATE); 2883 if (ret < 0) { 2884 mlog_errno(ret); 2885 goto end; 2886 } 2887 2888 /* Initialize ocfs2_xattr_block */ 2889 xblk = (struct ocfs2_xattr_block *)new_bh->b_data; 2890 memset(xblk, 0, inode->i_sb->s_blocksize); 2891 strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE); 2892 xblk->xb_suballoc_slot = cpu_to_le16(ctxt->meta_ac->ac_alloc_slot); 2893 xblk->xb_suballoc_loc = cpu_to_le64(suballoc_loc); 2894 xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start); 2895 xblk->xb_fs_generation = 2896 cpu_to_le32(OCFS2_SB(inode->i_sb)->fs_generation); 2897 xblk->xb_blkno = cpu_to_le64(first_blkno); 2898 if (indexed) { 2899 struct ocfs2_xattr_tree_root *xr = &xblk->xb_attrs.xb_root; 2900 xr->xt_clusters = cpu_to_le32(1); 2901 xr->xt_last_eb_blk = 0; 2902 xr->xt_list.l_tree_depth = 0; 2903 xr->xt_list.l_count = cpu_to_le16( 2904 ocfs2_xattr_recs_per_xb(inode->i_sb)); 2905 xr->xt_list.l_next_free_rec = cpu_to_le16(1); 2906 xblk->xb_flags = cpu_to_le16(OCFS2_XATTR_INDEXED); 2907 } 2908 ocfs2_journal_dirty(ctxt->handle, new_bh); 2909 2910 /* Add it to the inode */ 2911 di->i_xattr_loc = cpu_to_le64(first_blkno); 2912 2913 spin_lock(&OCFS2_I(inode)->ip_lock); 2914 OCFS2_I(inode)->ip_dyn_features |= OCFS2_HAS_XATTR_FL; 2915 di->i_dyn_features = cpu_to_le16(OCFS2_I(inode)->ip_dyn_features); 2916 spin_unlock(&OCFS2_I(inode)->ip_lock); 2917 2918 ocfs2_journal_dirty(ctxt->handle, inode_bh); 2919 2920 *ret_bh = new_bh; 2921 new_bh = NULL; 2922 2923 end: 2924 brelse(new_bh); 2925 return ret; 2926 } 2927 2928 /* 2929 * ocfs2_xattr_block_set() 2930 * 2931 * Set, replace or remove an extended attribute into external block. 2932 * 2933 */ 2934 static int ocfs2_xattr_block_set(struct inode *inode, 2935 struct ocfs2_xattr_info *xi, 2936 struct ocfs2_xattr_search *xs, 2937 struct ocfs2_xattr_set_ctxt *ctxt) 2938 { 2939 struct buffer_head *new_bh = NULL; 2940 struct ocfs2_xattr_block *xblk = NULL; 2941 int ret; 2942 struct ocfs2_xa_loc loc; 2943 2944 if (!xs->xattr_bh) { 2945 ret = ocfs2_create_xattr_block(inode, xs->inode_bh, ctxt, 2946 0, &new_bh); 2947 if (ret) { 2948 mlog_errno(ret); 2949 goto end; 2950 } 2951 2952 xs->xattr_bh = new_bh; 2953 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; 2954 xs->header = &xblk->xb_attrs.xb_header; 2955 xs->base = (void *)xs->header; 2956 xs->end = (void *)xblk + inode->i_sb->s_blocksize; 2957 xs->here = xs->header->xh_entries; 2958 } else 2959 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; 2960 2961 if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) { 2962 ocfs2_init_xattr_block_xa_loc(&loc, inode, xs->xattr_bh, 2963 xs->not_found ? NULL : xs->here); 2964 2965 ret = ocfs2_xa_set(&loc, xi, ctxt); 2966 if (!ret) 2967 xs->here = loc.xl_entry; 2968 else if ((ret != -ENOSPC) || ctxt->set_abort) 2969 goto end; 2970 else { 2971 ret = ocfs2_xattr_create_index_block(inode, xs, ctxt); 2972 if (ret) 2973 goto end; 2974 } 2975 } 2976 2977 if (le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED) 2978 ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt); 2979 2980 end: 2981 return ret; 2982 } 2983 2984 /* Check whether the new xattr can be inserted into the inode. */ 2985 static int ocfs2_xattr_can_be_in_inode(struct inode *inode, 2986 struct ocfs2_xattr_info *xi, 2987 struct ocfs2_xattr_search *xs) 2988 { 2989 struct ocfs2_xattr_entry *last; 2990 int free, i; 2991 size_t min_offs = xs->end - xs->base; 2992 2993 if (!xs->header) 2994 return 0; 2995 2996 last = xs->header->xh_entries; 2997 2998 for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) { 2999 size_t offs = le16_to_cpu(last->xe_name_offset); 3000 if (offs < min_offs) 3001 min_offs = offs; 3002 last += 1; 3003 } 3004 3005 free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP; 3006 if (free < 0) 3007 return 0; 3008 3009 BUG_ON(!xs->not_found); 3010 3011 if (free >= (sizeof(struct ocfs2_xattr_entry) + namevalue_size_xi(xi))) 3012 return 1; 3013 3014 return 0; 3015 } 3016 3017 static int ocfs2_calc_xattr_set_need(struct inode *inode, 3018 struct ocfs2_dinode *di, 3019 struct ocfs2_xattr_info *xi, 3020 struct ocfs2_xattr_search *xis, 3021 struct ocfs2_xattr_search *xbs, 3022 int *clusters_need, 3023 int *meta_need, 3024 int *credits_need) 3025 { 3026 int ret = 0, old_in_xb = 0; 3027 int clusters_add = 0, meta_add = 0, credits = 0; 3028 struct buffer_head *bh = NULL; 3029 struct ocfs2_xattr_block *xb = NULL; 3030 struct ocfs2_xattr_entry *xe = NULL; 3031 struct ocfs2_xattr_value_root *xv = NULL; 3032 char *base = NULL; 3033 int name_offset, name_len = 0; 3034 u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, 3035 xi->xi_value_len); 3036 u64 value_size; 3037 3038 /* 3039 * Calculate the clusters we need to write. 3040 * No matter whether we replace an old one or add a new one, 3041 * we need this for writing. 3042 */ 3043 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) 3044 credits += new_clusters * 3045 ocfs2_clusters_to_blocks(inode->i_sb, 1); 3046 3047 if (xis->not_found && xbs->not_found) { 3048 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3049 3050 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 3051 clusters_add += new_clusters; 3052 credits += ocfs2_calc_extend_credits(inode->i_sb, 3053 &def_xv.xv.xr_list); 3054 } 3055 3056 goto meta_guess; 3057 } 3058 3059 if (!xis->not_found) { 3060 xe = xis->here; 3061 name_offset = le16_to_cpu(xe->xe_name_offset); 3062 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 3063 base = xis->base; 3064 credits += OCFS2_INODE_UPDATE_CREDITS; 3065 } else { 3066 int i, block_off = 0; 3067 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; 3068 xe = xbs->here; 3069 name_offset = le16_to_cpu(xe->xe_name_offset); 3070 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 3071 i = xbs->here - xbs->header->xh_entries; 3072 old_in_xb = 1; 3073 3074 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 3075 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 3076 bucket_xh(xbs->bucket), 3077 i, &block_off, 3078 &name_offset); 3079 base = bucket_block(xbs->bucket, block_off); 3080 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3081 } else { 3082 base = xbs->base; 3083 credits += OCFS2_XATTR_BLOCK_UPDATE_CREDITS; 3084 } 3085 } 3086 3087 /* 3088 * delete a xattr doesn't need metadata and cluster allocation. 3089 * so just calculate the credits and return. 3090 * 3091 * The credits for removing the value tree will be extended 3092 * by ocfs2_remove_extent itself. 3093 */ 3094 if (!xi->xi_value) { 3095 if (!ocfs2_xattr_is_local(xe)) 3096 credits += ocfs2_remove_extent_credits(inode->i_sb); 3097 3098 goto out; 3099 } 3100 3101 /* do cluster allocation guess first. */ 3102 value_size = le64_to_cpu(xe->xe_value_size); 3103 3104 if (old_in_xb) { 3105 /* 3106 * In xattr set, we always try to set the xe in inode first, 3107 * so if it can be inserted into inode successfully, the old 3108 * one will be removed from the xattr block, and this xattr 3109 * will be inserted into inode as a new xattr in inode. 3110 */ 3111 if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) { 3112 clusters_add += new_clusters; 3113 credits += ocfs2_remove_extent_credits(inode->i_sb) + 3114 OCFS2_INODE_UPDATE_CREDITS; 3115 if (!ocfs2_xattr_is_local(xe)) 3116 credits += ocfs2_calc_extend_credits( 3117 inode->i_sb, 3118 &def_xv.xv.xr_list); 3119 goto out; 3120 } 3121 } 3122 3123 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 3124 /* the new values will be stored outside. */ 3125 u32 old_clusters = 0; 3126 3127 if (!ocfs2_xattr_is_local(xe)) { 3128 old_clusters = ocfs2_clusters_for_bytes(inode->i_sb, 3129 value_size); 3130 xv = (struct ocfs2_xattr_value_root *) 3131 (base + name_offset + name_len); 3132 value_size = OCFS2_XATTR_ROOT_SIZE; 3133 } else 3134 xv = &def_xv.xv; 3135 3136 if (old_clusters >= new_clusters) { 3137 credits += ocfs2_remove_extent_credits(inode->i_sb); 3138 goto out; 3139 } else { 3140 meta_add += ocfs2_extend_meta_needed(&xv->xr_list); 3141 clusters_add += new_clusters - old_clusters; 3142 credits += ocfs2_calc_extend_credits(inode->i_sb, 3143 &xv->xr_list); 3144 if (value_size >= OCFS2_XATTR_ROOT_SIZE) 3145 goto out; 3146 } 3147 } else { 3148 /* 3149 * Now the new value will be stored inside. So if the new 3150 * value is smaller than the size of value root or the old 3151 * value, we don't need any allocation, otherwise we have 3152 * to guess metadata allocation. 3153 */ 3154 if ((ocfs2_xattr_is_local(xe) && 3155 (value_size >= xi->xi_value_len)) || 3156 (!ocfs2_xattr_is_local(xe) && 3157 OCFS2_XATTR_ROOT_SIZE >= xi->xi_value_len)) 3158 goto out; 3159 } 3160 3161 meta_guess: 3162 /* calculate metadata allocation. */ 3163 if (di->i_xattr_loc) { 3164 if (!xbs->xattr_bh) { 3165 ret = ocfs2_read_xattr_block(inode, 3166 le64_to_cpu(di->i_xattr_loc), 3167 &bh); 3168 if (ret) { 3169 mlog_errno(ret); 3170 goto out; 3171 } 3172 3173 xb = (struct ocfs2_xattr_block *)bh->b_data; 3174 } else 3175 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; 3176 3177 /* 3178 * If there is already an xattr tree, good, we can calculate 3179 * like other b-trees. Otherwise we may have the chance of 3180 * create a tree, the credit calculation is borrowed from 3181 * ocfs2_calc_extend_credits with root_el = NULL. And the 3182 * new tree will be cluster based, so no meta is needed. 3183 */ 3184 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 3185 struct ocfs2_extent_list *el = 3186 &xb->xb_attrs.xb_root.xt_list; 3187 meta_add += ocfs2_extend_meta_needed(el); 3188 credits += ocfs2_calc_extend_credits(inode->i_sb, 3189 el); 3190 } else 3191 credits += OCFS2_SUBALLOC_ALLOC + 1; 3192 3193 /* 3194 * This cluster will be used either for new bucket or for 3195 * new xattr block. 3196 * If the cluster size is the same as the bucket size, one 3197 * more is needed since we may need to extend the bucket 3198 * also. 3199 */ 3200 clusters_add += 1; 3201 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3202 if (OCFS2_XATTR_BUCKET_SIZE == 3203 OCFS2_SB(inode->i_sb)->s_clustersize) { 3204 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3205 clusters_add += 1; 3206 } 3207 } else { 3208 credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; 3209 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { 3210 struct ocfs2_extent_list *el = &def_xv.xv.xr_list; 3211 meta_add += ocfs2_extend_meta_needed(el); 3212 credits += ocfs2_calc_extend_credits(inode->i_sb, 3213 el); 3214 } else { 3215 meta_add += 1; 3216 } 3217 } 3218 out: 3219 if (clusters_need) 3220 *clusters_need = clusters_add; 3221 if (meta_need) 3222 *meta_need = meta_add; 3223 if (credits_need) 3224 *credits_need = credits; 3225 brelse(bh); 3226 return ret; 3227 } 3228 3229 static int ocfs2_init_xattr_set_ctxt(struct inode *inode, 3230 struct ocfs2_dinode *di, 3231 struct ocfs2_xattr_info *xi, 3232 struct ocfs2_xattr_search *xis, 3233 struct ocfs2_xattr_search *xbs, 3234 struct ocfs2_xattr_set_ctxt *ctxt, 3235 int extra_meta, 3236 int *credits) 3237 { 3238 int clusters_add, meta_add, ret; 3239 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 3240 3241 memset(ctxt, 0, sizeof(struct ocfs2_xattr_set_ctxt)); 3242 3243 ocfs2_init_dealloc_ctxt(&ctxt->dealloc); 3244 3245 ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs, 3246 &clusters_add, &meta_add, credits); 3247 if (ret) { 3248 mlog_errno(ret); 3249 return ret; 3250 } 3251 3252 meta_add += extra_meta; 3253 trace_ocfs2_init_xattr_set_ctxt(xi->xi_name, meta_add, 3254 clusters_add, *credits); 3255 3256 if (meta_add) { 3257 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, 3258 &ctxt->meta_ac); 3259 if (ret) { 3260 mlog_errno(ret); 3261 goto out; 3262 } 3263 } 3264 3265 if (clusters_add) { 3266 ret = ocfs2_reserve_clusters(osb, clusters_add, &ctxt->data_ac); 3267 if (ret) 3268 mlog_errno(ret); 3269 } 3270 out: 3271 if (ret) { 3272 if (ctxt->meta_ac) { 3273 ocfs2_free_alloc_context(ctxt->meta_ac); 3274 ctxt->meta_ac = NULL; 3275 } 3276 3277 /* 3278 * We cannot have an error and a non null ctxt->data_ac. 3279 */ 3280 } 3281 3282 return ret; 3283 } 3284 3285 static int __ocfs2_xattr_set_handle(struct inode *inode, 3286 struct ocfs2_dinode *di, 3287 struct ocfs2_xattr_info *xi, 3288 struct ocfs2_xattr_search *xis, 3289 struct ocfs2_xattr_search *xbs, 3290 struct ocfs2_xattr_set_ctxt *ctxt) 3291 { 3292 int ret = 0, credits, old_found; 3293 3294 if (!xi->xi_value) { 3295 /* Remove existing extended attribute */ 3296 if (!xis->not_found) 3297 ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt); 3298 else if (!xbs->not_found) 3299 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); 3300 } else { 3301 /* We always try to set extended attribute into inode first*/ 3302 ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt); 3303 if (!ret && !xbs->not_found) { 3304 /* 3305 * If succeed and that extended attribute existing in 3306 * external block, then we will remove it. 3307 */ 3308 xi->xi_value = NULL; 3309 xi->xi_value_len = 0; 3310 3311 old_found = xis->not_found; 3312 xis->not_found = -ENODATA; 3313 ret = ocfs2_calc_xattr_set_need(inode, 3314 di, 3315 xi, 3316 xis, 3317 xbs, 3318 NULL, 3319 NULL, 3320 &credits); 3321 xis->not_found = old_found; 3322 if (ret) { 3323 mlog_errno(ret); 3324 goto out; 3325 } 3326 3327 ret = ocfs2_extend_trans(ctxt->handle, credits); 3328 if (ret) { 3329 mlog_errno(ret); 3330 goto out; 3331 } 3332 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); 3333 } else if ((ret == -ENOSPC) && !ctxt->set_abort) { 3334 if (di->i_xattr_loc && !xbs->xattr_bh) { 3335 ret = ocfs2_xattr_block_find(inode, 3336 xi->xi_name_index, 3337 xi->xi_name, xbs); 3338 if (ret) 3339 goto out; 3340 3341 old_found = xis->not_found; 3342 xis->not_found = -ENODATA; 3343 ret = ocfs2_calc_xattr_set_need(inode, 3344 di, 3345 xi, 3346 xis, 3347 xbs, 3348 NULL, 3349 NULL, 3350 &credits); 3351 xis->not_found = old_found; 3352 if (ret) { 3353 mlog_errno(ret); 3354 goto out; 3355 } 3356 3357 ret = ocfs2_extend_trans(ctxt->handle, credits); 3358 if (ret) { 3359 mlog_errno(ret); 3360 goto out; 3361 } 3362 } 3363 /* 3364 * If no space in inode, we will set extended attribute 3365 * into external block. 3366 */ 3367 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); 3368 if (ret) 3369 goto out; 3370 if (!xis->not_found) { 3371 /* 3372 * If succeed and that extended attribute 3373 * existing in inode, we will remove it. 3374 */ 3375 xi->xi_value = NULL; 3376 xi->xi_value_len = 0; 3377 xbs->not_found = -ENODATA; 3378 ret = ocfs2_calc_xattr_set_need(inode, 3379 di, 3380 xi, 3381 xis, 3382 xbs, 3383 NULL, 3384 NULL, 3385 &credits); 3386 if (ret) { 3387 mlog_errno(ret); 3388 goto out; 3389 } 3390 3391 ret = ocfs2_extend_trans(ctxt->handle, credits); 3392 if (ret) { 3393 mlog_errno(ret); 3394 goto out; 3395 } 3396 ret = ocfs2_xattr_ibody_set(inode, xi, 3397 xis, ctxt); 3398 } 3399 } 3400 } 3401 3402 if (!ret) { 3403 /* Update inode ctime. */ 3404 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), 3405 xis->inode_bh, 3406 OCFS2_JOURNAL_ACCESS_WRITE); 3407 if (ret) { 3408 mlog_errno(ret); 3409 goto out; 3410 } 3411 3412 inode->i_ctime = CURRENT_TIME; 3413 di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); 3414 di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); 3415 ocfs2_journal_dirty(ctxt->handle, xis->inode_bh); 3416 } 3417 out: 3418 return ret; 3419 } 3420 3421 /* 3422 * This function only called duing creating inode 3423 * for init security/acl xattrs of the new inode. 3424 * All transanction credits have been reserved in mknod. 3425 */ 3426 int ocfs2_xattr_set_handle(handle_t *handle, 3427 struct inode *inode, 3428 struct buffer_head *di_bh, 3429 int name_index, 3430 const char *name, 3431 const void *value, 3432 size_t value_len, 3433 int flags, 3434 struct ocfs2_alloc_context *meta_ac, 3435 struct ocfs2_alloc_context *data_ac) 3436 { 3437 struct ocfs2_dinode *di; 3438 int ret; 3439 3440 struct ocfs2_xattr_info xi = { 3441 .xi_name_index = name_index, 3442 .xi_name = name, 3443 .xi_name_len = strlen(name), 3444 .xi_value = value, 3445 .xi_value_len = value_len, 3446 }; 3447 3448 struct ocfs2_xattr_search xis = { 3449 .not_found = -ENODATA, 3450 }; 3451 3452 struct ocfs2_xattr_search xbs = { 3453 .not_found = -ENODATA, 3454 }; 3455 3456 struct ocfs2_xattr_set_ctxt ctxt = { 3457 .handle = handle, 3458 .meta_ac = meta_ac, 3459 .data_ac = data_ac, 3460 }; 3461 3462 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) 3463 return -EOPNOTSUPP; 3464 3465 /* 3466 * In extreme situation, may need xattr bucket when 3467 * block size is too small. And we have already reserved 3468 * the credits for bucket in mknod. 3469 */ 3470 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) { 3471 xbs.bucket = ocfs2_xattr_bucket_new(inode); 3472 if (!xbs.bucket) { 3473 mlog_errno(-ENOMEM); 3474 return -ENOMEM; 3475 } 3476 } 3477 3478 xis.inode_bh = xbs.inode_bh = di_bh; 3479 di = (struct ocfs2_dinode *)di_bh->b_data; 3480 3481 down_write(&OCFS2_I(inode)->ip_xattr_sem); 3482 3483 ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis); 3484 if (ret) 3485 goto cleanup; 3486 if (xis.not_found) { 3487 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs); 3488 if (ret) 3489 goto cleanup; 3490 } 3491 3492 ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt); 3493 3494 cleanup: 3495 up_write(&OCFS2_I(inode)->ip_xattr_sem); 3496 brelse(xbs.xattr_bh); 3497 ocfs2_xattr_bucket_free(xbs.bucket); 3498 3499 return ret; 3500 } 3501 3502 /* 3503 * ocfs2_xattr_set() 3504 * 3505 * Set, replace or remove an extended attribute for this inode. 3506 * value is NULL to remove an existing extended attribute, else either 3507 * create or replace an extended attribute. 3508 */ 3509 int ocfs2_xattr_set(struct inode *inode, 3510 int name_index, 3511 const char *name, 3512 const void *value, 3513 size_t value_len, 3514 int flags) 3515 { 3516 struct buffer_head *di_bh = NULL; 3517 struct ocfs2_dinode *di; 3518 int ret, credits, ref_meta = 0, ref_credits = 0; 3519 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 3520 struct inode *tl_inode = osb->osb_tl_inode; 3521 struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, NULL, }; 3522 struct ocfs2_refcount_tree *ref_tree = NULL; 3523 3524 struct ocfs2_xattr_info xi = { 3525 .xi_name_index = name_index, 3526 .xi_name = name, 3527 .xi_name_len = strlen(name), 3528 .xi_value = value, 3529 .xi_value_len = value_len, 3530 }; 3531 3532 struct ocfs2_xattr_search xis = { 3533 .not_found = -ENODATA, 3534 }; 3535 3536 struct ocfs2_xattr_search xbs = { 3537 .not_found = -ENODATA, 3538 }; 3539 3540 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) 3541 return -EOPNOTSUPP; 3542 3543 /* 3544 * Only xbs will be used on indexed trees. xis doesn't need a 3545 * bucket. 3546 */ 3547 xbs.bucket = ocfs2_xattr_bucket_new(inode); 3548 if (!xbs.bucket) { 3549 mlog_errno(-ENOMEM); 3550 return -ENOMEM; 3551 } 3552 3553 ret = ocfs2_inode_lock(inode, &di_bh, 1); 3554 if (ret < 0) { 3555 mlog_errno(ret); 3556 goto cleanup_nolock; 3557 } 3558 xis.inode_bh = xbs.inode_bh = di_bh; 3559 di = (struct ocfs2_dinode *)di_bh->b_data; 3560 3561 down_write(&OCFS2_I(inode)->ip_xattr_sem); 3562 /* 3563 * Scan inode and external block to find the same name 3564 * extended attribute and collect search information. 3565 */ 3566 ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis); 3567 if (ret) 3568 goto cleanup; 3569 if (xis.not_found) { 3570 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs); 3571 if (ret) 3572 goto cleanup; 3573 } 3574 3575 if (xis.not_found && xbs.not_found) { 3576 ret = -ENODATA; 3577 if (flags & XATTR_REPLACE) 3578 goto cleanup; 3579 ret = 0; 3580 if (!value) 3581 goto cleanup; 3582 } else { 3583 ret = -EEXIST; 3584 if (flags & XATTR_CREATE) 3585 goto cleanup; 3586 } 3587 3588 /* Check whether the value is refcounted and do some preparation. */ 3589 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL && 3590 (!xis.not_found || !xbs.not_found)) { 3591 ret = ocfs2_prepare_refcount_xattr(inode, di, &xi, 3592 &xis, &xbs, &ref_tree, 3593 &ref_meta, &ref_credits); 3594 if (ret) { 3595 mlog_errno(ret); 3596 goto cleanup; 3597 } 3598 } 3599 3600 mutex_lock(&tl_inode->i_mutex); 3601 3602 if (ocfs2_truncate_log_needs_flush(osb)) { 3603 ret = __ocfs2_flush_truncate_log(osb); 3604 if (ret < 0) { 3605 mutex_unlock(&tl_inode->i_mutex); 3606 mlog_errno(ret); 3607 goto cleanup; 3608 } 3609 } 3610 mutex_unlock(&tl_inode->i_mutex); 3611 3612 ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis, 3613 &xbs, &ctxt, ref_meta, &credits); 3614 if (ret) { 3615 mlog_errno(ret); 3616 goto cleanup; 3617 } 3618 3619 /* we need to update inode's ctime field, so add credit for it. */ 3620 credits += OCFS2_INODE_UPDATE_CREDITS; 3621 ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits); 3622 if (IS_ERR(ctxt.handle)) { 3623 ret = PTR_ERR(ctxt.handle); 3624 mlog_errno(ret); 3625 goto out_free_ac; 3626 } 3627 3628 ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt); 3629 ocfs2_update_inode_fsync_trans(ctxt.handle, inode, 0); 3630 3631 ocfs2_commit_trans(osb, ctxt.handle); 3632 3633 out_free_ac: 3634 if (ctxt.data_ac) 3635 ocfs2_free_alloc_context(ctxt.data_ac); 3636 if (ctxt.meta_ac) 3637 ocfs2_free_alloc_context(ctxt.meta_ac); 3638 if (ocfs2_dealloc_has_cluster(&ctxt.dealloc)) 3639 ocfs2_schedule_truncate_log_flush(osb, 1); 3640 ocfs2_run_deallocs(osb, &ctxt.dealloc); 3641 3642 cleanup: 3643 if (ref_tree) 3644 ocfs2_unlock_refcount_tree(osb, ref_tree, 1); 3645 up_write(&OCFS2_I(inode)->ip_xattr_sem); 3646 if (!value && !ret) { 3647 ret = ocfs2_try_remove_refcount_tree(inode, di_bh); 3648 if (ret) 3649 mlog_errno(ret); 3650 } 3651 ocfs2_inode_unlock(inode, 1); 3652 cleanup_nolock: 3653 brelse(di_bh); 3654 brelse(xbs.xattr_bh); 3655 ocfs2_xattr_bucket_free(xbs.bucket); 3656 3657 return ret; 3658 } 3659 3660 /* 3661 * Find the xattr extent rec which may contains name_hash. 3662 * e_cpos will be the first name hash of the xattr rec. 3663 * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list. 3664 */ 3665 static int ocfs2_xattr_get_rec(struct inode *inode, 3666 u32 name_hash, 3667 u64 *p_blkno, 3668 u32 *e_cpos, 3669 u32 *num_clusters, 3670 struct ocfs2_extent_list *el) 3671 { 3672 int ret = 0, i; 3673 struct buffer_head *eb_bh = NULL; 3674 struct ocfs2_extent_block *eb; 3675 struct ocfs2_extent_rec *rec = NULL; 3676 u64 e_blkno = 0; 3677 3678 if (el->l_tree_depth) { 3679 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, name_hash, 3680 &eb_bh); 3681 if (ret) { 3682 mlog_errno(ret); 3683 goto out; 3684 } 3685 3686 eb = (struct ocfs2_extent_block *) eb_bh->b_data; 3687 el = &eb->h_list; 3688 3689 if (el->l_tree_depth) { 3690 ret = ocfs2_error(inode->i_sb, 3691 "Inode %lu has non zero tree depth in xattr tree block %llu\n", 3692 inode->i_ino, 3693 (unsigned long long)eb_bh->b_blocknr); 3694 goto out; 3695 } 3696 } 3697 3698 for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) { 3699 rec = &el->l_recs[i]; 3700 3701 if (le32_to_cpu(rec->e_cpos) <= name_hash) { 3702 e_blkno = le64_to_cpu(rec->e_blkno); 3703 break; 3704 } 3705 } 3706 3707 if (!e_blkno) { 3708 ret = ocfs2_error(inode->i_sb, "Inode %lu has bad extent record (%u, %u, 0) in xattr\n", 3709 inode->i_ino, 3710 le32_to_cpu(rec->e_cpos), 3711 ocfs2_rec_clusters(el, rec)); 3712 goto out; 3713 } 3714 3715 *p_blkno = le64_to_cpu(rec->e_blkno); 3716 *num_clusters = le16_to_cpu(rec->e_leaf_clusters); 3717 if (e_cpos) 3718 *e_cpos = le32_to_cpu(rec->e_cpos); 3719 out: 3720 brelse(eb_bh); 3721 return ret; 3722 } 3723 3724 typedef int (xattr_bucket_func)(struct inode *inode, 3725 struct ocfs2_xattr_bucket *bucket, 3726 void *para); 3727 3728 static int ocfs2_find_xe_in_bucket(struct inode *inode, 3729 struct ocfs2_xattr_bucket *bucket, 3730 int name_index, 3731 const char *name, 3732 u32 name_hash, 3733 u16 *xe_index, 3734 int *found) 3735 { 3736 int i, ret = 0, cmp = 1, block_off, new_offset; 3737 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 3738 size_t name_len = strlen(name); 3739 struct ocfs2_xattr_entry *xe = NULL; 3740 char *xe_name; 3741 3742 /* 3743 * We don't use binary search in the bucket because there 3744 * may be multiple entries with the same name hash. 3745 */ 3746 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 3747 xe = &xh->xh_entries[i]; 3748 3749 if (name_hash > le32_to_cpu(xe->xe_name_hash)) 3750 continue; 3751 else if (name_hash < le32_to_cpu(xe->xe_name_hash)) 3752 break; 3753 3754 cmp = name_index - ocfs2_xattr_get_type(xe); 3755 if (!cmp) 3756 cmp = name_len - xe->xe_name_len; 3757 if (cmp) 3758 continue; 3759 3760 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 3761 xh, 3762 i, 3763 &block_off, 3764 &new_offset); 3765 if (ret) { 3766 mlog_errno(ret); 3767 break; 3768 } 3769 3770 3771 xe_name = bucket_block(bucket, block_off) + new_offset; 3772 if (!memcmp(name, xe_name, name_len)) { 3773 *xe_index = i; 3774 *found = 1; 3775 ret = 0; 3776 break; 3777 } 3778 } 3779 3780 return ret; 3781 } 3782 3783 /* 3784 * Find the specified xattr entry in a series of buckets. 3785 * This series start from p_blkno and last for num_clusters. 3786 * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains 3787 * the num of the valid buckets. 3788 * 3789 * Return the buffer_head this xattr should reside in. And if the xattr's 3790 * hash is in the gap of 2 buckets, return the lower bucket. 3791 */ 3792 static int ocfs2_xattr_bucket_find(struct inode *inode, 3793 int name_index, 3794 const char *name, 3795 u32 name_hash, 3796 u64 p_blkno, 3797 u32 first_hash, 3798 u32 num_clusters, 3799 struct ocfs2_xattr_search *xs) 3800 { 3801 int ret, found = 0; 3802 struct ocfs2_xattr_header *xh = NULL; 3803 struct ocfs2_xattr_entry *xe = NULL; 3804 u16 index = 0; 3805 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3806 int low_bucket = 0, bucket, high_bucket; 3807 struct ocfs2_xattr_bucket *search; 3808 u32 last_hash; 3809 u64 blkno, lower_blkno = 0; 3810 3811 search = ocfs2_xattr_bucket_new(inode); 3812 if (!search) { 3813 ret = -ENOMEM; 3814 mlog_errno(ret); 3815 goto out; 3816 } 3817 3818 ret = ocfs2_read_xattr_bucket(search, p_blkno); 3819 if (ret) { 3820 mlog_errno(ret); 3821 goto out; 3822 } 3823 3824 xh = bucket_xh(search); 3825 high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1; 3826 while (low_bucket <= high_bucket) { 3827 ocfs2_xattr_bucket_relse(search); 3828 3829 bucket = (low_bucket + high_bucket) / 2; 3830 blkno = p_blkno + bucket * blk_per_bucket; 3831 ret = ocfs2_read_xattr_bucket(search, blkno); 3832 if (ret) { 3833 mlog_errno(ret); 3834 goto out; 3835 } 3836 3837 xh = bucket_xh(search); 3838 xe = &xh->xh_entries[0]; 3839 if (name_hash < le32_to_cpu(xe->xe_name_hash)) { 3840 high_bucket = bucket - 1; 3841 continue; 3842 } 3843 3844 /* 3845 * Check whether the hash of the last entry in our 3846 * bucket is larger than the search one. for an empty 3847 * bucket, the last one is also the first one. 3848 */ 3849 if (xh->xh_count) 3850 xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1]; 3851 3852 last_hash = le32_to_cpu(xe->xe_name_hash); 3853 3854 /* record lower_blkno which may be the insert place. */ 3855 lower_blkno = blkno; 3856 3857 if (name_hash > le32_to_cpu(xe->xe_name_hash)) { 3858 low_bucket = bucket + 1; 3859 continue; 3860 } 3861 3862 /* the searched xattr should reside in this bucket if exists. */ 3863 ret = ocfs2_find_xe_in_bucket(inode, search, 3864 name_index, name, name_hash, 3865 &index, &found); 3866 if (ret) { 3867 mlog_errno(ret); 3868 goto out; 3869 } 3870 break; 3871 } 3872 3873 /* 3874 * Record the bucket we have found. 3875 * When the xattr's hash value is in the gap of 2 buckets, we will 3876 * always set it to the previous bucket. 3877 */ 3878 if (!lower_blkno) 3879 lower_blkno = p_blkno; 3880 3881 /* This should be in cache - we just read it during the search */ 3882 ret = ocfs2_read_xattr_bucket(xs->bucket, lower_blkno); 3883 if (ret) { 3884 mlog_errno(ret); 3885 goto out; 3886 } 3887 3888 xs->header = bucket_xh(xs->bucket); 3889 xs->base = bucket_block(xs->bucket, 0); 3890 xs->end = xs->base + inode->i_sb->s_blocksize; 3891 3892 if (found) { 3893 xs->here = &xs->header->xh_entries[index]; 3894 trace_ocfs2_xattr_bucket_find(OCFS2_I(inode)->ip_blkno, 3895 name, name_index, name_hash, 3896 (unsigned long long)bucket_blkno(xs->bucket), 3897 index); 3898 } else 3899 ret = -ENODATA; 3900 3901 out: 3902 ocfs2_xattr_bucket_free(search); 3903 return ret; 3904 } 3905 3906 static int ocfs2_xattr_index_block_find(struct inode *inode, 3907 struct buffer_head *root_bh, 3908 int name_index, 3909 const char *name, 3910 struct ocfs2_xattr_search *xs) 3911 { 3912 int ret; 3913 struct ocfs2_xattr_block *xb = 3914 (struct ocfs2_xattr_block *)root_bh->b_data; 3915 struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root; 3916 struct ocfs2_extent_list *el = &xb_root->xt_list; 3917 u64 p_blkno = 0; 3918 u32 first_hash, num_clusters = 0; 3919 u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name)); 3920 3921 if (le16_to_cpu(el->l_next_free_rec) == 0) 3922 return -ENODATA; 3923 3924 trace_ocfs2_xattr_index_block_find(OCFS2_I(inode)->ip_blkno, 3925 name, name_index, name_hash, 3926 (unsigned long long)root_bh->b_blocknr, 3927 -1); 3928 3929 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash, 3930 &num_clusters, el); 3931 if (ret) { 3932 mlog_errno(ret); 3933 goto out; 3934 } 3935 3936 BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash); 3937 3938 trace_ocfs2_xattr_index_block_find_rec(OCFS2_I(inode)->ip_blkno, 3939 name, name_index, first_hash, 3940 (unsigned long long)p_blkno, 3941 num_clusters); 3942 3943 ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash, 3944 p_blkno, first_hash, num_clusters, xs); 3945 3946 out: 3947 return ret; 3948 } 3949 3950 static int ocfs2_iterate_xattr_buckets(struct inode *inode, 3951 u64 blkno, 3952 u32 clusters, 3953 xattr_bucket_func *func, 3954 void *para) 3955 { 3956 int i, ret = 0; 3957 u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)); 3958 u32 num_buckets = clusters * bpc; 3959 struct ocfs2_xattr_bucket *bucket; 3960 3961 bucket = ocfs2_xattr_bucket_new(inode); 3962 if (!bucket) { 3963 mlog_errno(-ENOMEM); 3964 return -ENOMEM; 3965 } 3966 3967 trace_ocfs2_iterate_xattr_buckets( 3968 (unsigned long long)OCFS2_I(inode)->ip_blkno, 3969 (unsigned long long)blkno, clusters); 3970 3971 for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) { 3972 ret = ocfs2_read_xattr_bucket(bucket, blkno); 3973 if (ret) { 3974 mlog_errno(ret); 3975 break; 3976 } 3977 3978 /* 3979 * The real bucket num in this series of blocks is stored 3980 * in the 1st bucket. 3981 */ 3982 if (i == 0) 3983 num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets); 3984 3985 trace_ocfs2_iterate_xattr_bucket((unsigned long long)blkno, 3986 le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash)); 3987 if (func) { 3988 ret = func(inode, bucket, para); 3989 if (ret && ret != -ERANGE) 3990 mlog_errno(ret); 3991 /* Fall through to bucket_relse() */ 3992 } 3993 3994 ocfs2_xattr_bucket_relse(bucket); 3995 if (ret) 3996 break; 3997 } 3998 3999 ocfs2_xattr_bucket_free(bucket); 4000 return ret; 4001 } 4002 4003 struct ocfs2_xattr_tree_list { 4004 char *buffer; 4005 size_t buffer_size; 4006 size_t result; 4007 }; 4008 4009 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb, 4010 struct ocfs2_xattr_header *xh, 4011 int index, 4012 int *block_off, 4013 int *new_offset) 4014 { 4015 u16 name_offset; 4016 4017 if (index < 0 || index >= le16_to_cpu(xh->xh_count)) 4018 return -EINVAL; 4019 4020 name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset); 4021 4022 *block_off = name_offset >> sb->s_blocksize_bits; 4023 *new_offset = name_offset % sb->s_blocksize; 4024 4025 return 0; 4026 } 4027 4028 static int ocfs2_list_xattr_bucket(struct inode *inode, 4029 struct ocfs2_xattr_bucket *bucket, 4030 void *para) 4031 { 4032 int ret = 0, type; 4033 struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para; 4034 int i, block_off, new_offset; 4035 const char *prefix, *name; 4036 4037 for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) { 4038 struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i]; 4039 type = ocfs2_xattr_get_type(entry); 4040 prefix = ocfs2_xattr_prefix(type); 4041 4042 if (prefix) { 4043 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 4044 bucket_xh(bucket), 4045 i, 4046 &block_off, 4047 &new_offset); 4048 if (ret) 4049 break; 4050 4051 name = (const char *)bucket_block(bucket, block_off) + 4052 new_offset; 4053 ret = ocfs2_xattr_list_entry(xl->buffer, 4054 xl->buffer_size, 4055 &xl->result, 4056 prefix, name, 4057 entry->xe_name_len); 4058 if (ret) 4059 break; 4060 } 4061 } 4062 4063 return ret; 4064 } 4065 4066 static int ocfs2_iterate_xattr_index_block(struct inode *inode, 4067 struct buffer_head *blk_bh, 4068 xattr_tree_rec_func *rec_func, 4069 void *para) 4070 { 4071 struct ocfs2_xattr_block *xb = 4072 (struct ocfs2_xattr_block *)blk_bh->b_data; 4073 struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list; 4074 int ret = 0; 4075 u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0; 4076 u64 p_blkno = 0; 4077 4078 if (!el->l_next_free_rec || !rec_func) 4079 return 0; 4080 4081 while (name_hash > 0) { 4082 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, 4083 &e_cpos, &num_clusters, el); 4084 if (ret) { 4085 mlog_errno(ret); 4086 break; 4087 } 4088 4089 ret = rec_func(inode, blk_bh, p_blkno, e_cpos, 4090 num_clusters, para); 4091 if (ret) { 4092 if (ret != -ERANGE) 4093 mlog_errno(ret); 4094 break; 4095 } 4096 4097 if (e_cpos == 0) 4098 break; 4099 4100 name_hash = e_cpos - 1; 4101 } 4102 4103 return ret; 4104 4105 } 4106 4107 static int ocfs2_list_xattr_tree_rec(struct inode *inode, 4108 struct buffer_head *root_bh, 4109 u64 blkno, u32 cpos, u32 len, void *para) 4110 { 4111 return ocfs2_iterate_xattr_buckets(inode, blkno, len, 4112 ocfs2_list_xattr_bucket, para); 4113 } 4114 4115 static int ocfs2_xattr_tree_list_index_block(struct inode *inode, 4116 struct buffer_head *blk_bh, 4117 char *buffer, 4118 size_t buffer_size) 4119 { 4120 int ret; 4121 struct ocfs2_xattr_tree_list xl = { 4122 .buffer = buffer, 4123 .buffer_size = buffer_size, 4124 .result = 0, 4125 }; 4126 4127 ret = ocfs2_iterate_xattr_index_block(inode, blk_bh, 4128 ocfs2_list_xattr_tree_rec, &xl); 4129 if (ret) { 4130 mlog_errno(ret); 4131 goto out; 4132 } 4133 4134 ret = xl.result; 4135 out: 4136 return ret; 4137 } 4138 4139 static int cmp_xe(const void *a, const void *b) 4140 { 4141 const struct ocfs2_xattr_entry *l = a, *r = b; 4142 u32 l_hash = le32_to_cpu(l->xe_name_hash); 4143 u32 r_hash = le32_to_cpu(r->xe_name_hash); 4144 4145 if (l_hash > r_hash) 4146 return 1; 4147 if (l_hash < r_hash) 4148 return -1; 4149 return 0; 4150 } 4151 4152 static void swap_xe(void *a, void *b, int size) 4153 { 4154 struct ocfs2_xattr_entry *l = a, *r = b, tmp; 4155 4156 tmp = *l; 4157 memcpy(l, r, sizeof(struct ocfs2_xattr_entry)); 4158 memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry)); 4159 } 4160 4161 /* 4162 * When the ocfs2_xattr_block is filled up, new bucket will be created 4163 * and all the xattr entries will be moved to the new bucket. 4164 * The header goes at the start of the bucket, and the names+values are 4165 * filled from the end. This is why *target starts as the last buffer. 4166 * Note: we need to sort the entries since they are not saved in order 4167 * in the ocfs2_xattr_block. 4168 */ 4169 static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode, 4170 struct buffer_head *xb_bh, 4171 struct ocfs2_xattr_bucket *bucket) 4172 { 4173 int i, blocksize = inode->i_sb->s_blocksize; 4174 int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 4175 u16 offset, size, off_change; 4176 struct ocfs2_xattr_entry *xe; 4177 struct ocfs2_xattr_block *xb = 4178 (struct ocfs2_xattr_block *)xb_bh->b_data; 4179 struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header; 4180 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 4181 u16 count = le16_to_cpu(xb_xh->xh_count); 4182 char *src = xb_bh->b_data; 4183 char *target = bucket_block(bucket, blks - 1); 4184 4185 trace_ocfs2_cp_xattr_block_to_bucket_begin( 4186 (unsigned long long)xb_bh->b_blocknr, 4187 (unsigned long long)bucket_blkno(bucket)); 4188 4189 for (i = 0; i < blks; i++) 4190 memset(bucket_block(bucket, i), 0, blocksize); 4191 4192 /* 4193 * Since the xe_name_offset is based on ocfs2_xattr_header, 4194 * there is a offset change corresponding to the change of 4195 * ocfs2_xattr_header's position. 4196 */ 4197 off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); 4198 xe = &xb_xh->xh_entries[count - 1]; 4199 offset = le16_to_cpu(xe->xe_name_offset) + off_change; 4200 size = blocksize - offset; 4201 4202 /* copy all the names and values. */ 4203 memcpy(target + offset, src + offset, size); 4204 4205 /* Init new header now. */ 4206 xh->xh_count = xb_xh->xh_count; 4207 xh->xh_num_buckets = cpu_to_le16(1); 4208 xh->xh_name_value_len = cpu_to_le16(size); 4209 xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size); 4210 4211 /* copy all the entries. */ 4212 target = bucket_block(bucket, 0); 4213 offset = offsetof(struct ocfs2_xattr_header, xh_entries); 4214 size = count * sizeof(struct ocfs2_xattr_entry); 4215 memcpy(target + offset, (char *)xb_xh + offset, size); 4216 4217 /* Change the xe offset for all the xe because of the move. */ 4218 off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize + 4219 offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); 4220 for (i = 0; i < count; i++) 4221 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change); 4222 4223 trace_ocfs2_cp_xattr_block_to_bucket_end(offset, size, off_change); 4224 4225 sort(target + offset, count, sizeof(struct ocfs2_xattr_entry), 4226 cmp_xe, swap_xe); 4227 } 4228 4229 /* 4230 * After we move xattr from block to index btree, we have to 4231 * update ocfs2_xattr_search to the new xe and base. 4232 * 4233 * When the entry is in xattr block, xattr_bh indicates the storage place. 4234 * While if the entry is in index b-tree, "bucket" indicates the 4235 * real place of the xattr. 4236 */ 4237 static void ocfs2_xattr_update_xattr_search(struct inode *inode, 4238 struct ocfs2_xattr_search *xs, 4239 struct buffer_head *old_bh) 4240 { 4241 char *buf = old_bh->b_data; 4242 struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf; 4243 struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header; 4244 int i; 4245 4246 xs->header = bucket_xh(xs->bucket); 4247 xs->base = bucket_block(xs->bucket, 0); 4248 xs->end = xs->base + inode->i_sb->s_blocksize; 4249 4250 if (xs->not_found) 4251 return; 4252 4253 i = xs->here - old_xh->xh_entries; 4254 xs->here = &xs->header->xh_entries[i]; 4255 } 4256 4257 static int ocfs2_xattr_create_index_block(struct inode *inode, 4258 struct ocfs2_xattr_search *xs, 4259 struct ocfs2_xattr_set_ctxt *ctxt) 4260 { 4261 int ret; 4262 u32 bit_off, len; 4263 u64 blkno; 4264 handle_t *handle = ctxt->handle; 4265 struct ocfs2_inode_info *oi = OCFS2_I(inode); 4266 struct buffer_head *xb_bh = xs->xattr_bh; 4267 struct ocfs2_xattr_block *xb = 4268 (struct ocfs2_xattr_block *)xb_bh->b_data; 4269 struct ocfs2_xattr_tree_root *xr; 4270 u16 xb_flags = le16_to_cpu(xb->xb_flags); 4271 4272 trace_ocfs2_xattr_create_index_block_begin( 4273 (unsigned long long)xb_bh->b_blocknr); 4274 4275 BUG_ON(xb_flags & OCFS2_XATTR_INDEXED); 4276 BUG_ON(!xs->bucket); 4277 4278 /* 4279 * XXX: 4280 * We can use this lock for now, and maybe move to a dedicated mutex 4281 * if performance becomes a problem later. 4282 */ 4283 down_write(&oi->ip_alloc_sem); 4284 4285 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), xb_bh, 4286 OCFS2_JOURNAL_ACCESS_WRITE); 4287 if (ret) { 4288 mlog_errno(ret); 4289 goto out; 4290 } 4291 4292 ret = __ocfs2_claim_clusters(handle, ctxt->data_ac, 4293 1, 1, &bit_off, &len); 4294 if (ret) { 4295 mlog_errno(ret); 4296 goto out; 4297 } 4298 4299 /* 4300 * The bucket may spread in many blocks, and 4301 * we will only touch the 1st block and the last block 4302 * in the whole bucket(one for entry and one for data). 4303 */ 4304 blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off); 4305 4306 trace_ocfs2_xattr_create_index_block((unsigned long long)blkno); 4307 4308 ret = ocfs2_init_xattr_bucket(xs->bucket, blkno, 1); 4309 if (ret) { 4310 mlog_errno(ret); 4311 goto out; 4312 } 4313 4314 ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket, 4315 OCFS2_JOURNAL_ACCESS_CREATE); 4316 if (ret) { 4317 mlog_errno(ret); 4318 goto out; 4319 } 4320 4321 ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket); 4322 ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket); 4323 4324 ocfs2_xattr_update_xattr_search(inode, xs, xb_bh); 4325 4326 /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */ 4327 memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize - 4328 offsetof(struct ocfs2_xattr_block, xb_attrs)); 4329 4330 xr = &xb->xb_attrs.xb_root; 4331 xr->xt_clusters = cpu_to_le32(1); 4332 xr->xt_last_eb_blk = 0; 4333 xr->xt_list.l_tree_depth = 0; 4334 xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb)); 4335 xr->xt_list.l_next_free_rec = cpu_to_le16(1); 4336 4337 xr->xt_list.l_recs[0].e_cpos = 0; 4338 xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno); 4339 xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1); 4340 4341 xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED); 4342 4343 ocfs2_journal_dirty(handle, xb_bh); 4344 4345 out: 4346 up_write(&oi->ip_alloc_sem); 4347 4348 return ret; 4349 } 4350 4351 static int cmp_xe_offset(const void *a, const void *b) 4352 { 4353 const struct ocfs2_xattr_entry *l = a, *r = b; 4354 u32 l_name_offset = le16_to_cpu(l->xe_name_offset); 4355 u32 r_name_offset = le16_to_cpu(r->xe_name_offset); 4356 4357 if (l_name_offset < r_name_offset) 4358 return 1; 4359 if (l_name_offset > r_name_offset) 4360 return -1; 4361 return 0; 4362 } 4363 4364 /* 4365 * defrag a xattr bucket if we find that the bucket has some 4366 * holes beteen name/value pairs. 4367 * We will move all the name/value pairs to the end of the bucket 4368 * so that we can spare some space for insertion. 4369 */ 4370 static int ocfs2_defrag_xattr_bucket(struct inode *inode, 4371 handle_t *handle, 4372 struct ocfs2_xattr_bucket *bucket) 4373 { 4374 int ret, i; 4375 size_t end, offset, len; 4376 struct ocfs2_xattr_header *xh; 4377 char *entries, *buf, *bucket_buf = NULL; 4378 u64 blkno = bucket_blkno(bucket); 4379 u16 xh_free_start; 4380 size_t blocksize = inode->i_sb->s_blocksize; 4381 struct ocfs2_xattr_entry *xe; 4382 4383 /* 4384 * In order to make the operation more efficient and generic, 4385 * we copy all the blocks into a contiguous memory and do the 4386 * defragment there, so if anything is error, we will not touch 4387 * the real block. 4388 */ 4389 bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS); 4390 if (!bucket_buf) { 4391 ret = -EIO; 4392 goto out; 4393 } 4394 4395 buf = bucket_buf; 4396 for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize) 4397 memcpy(buf, bucket_block(bucket, i), blocksize); 4398 4399 ret = ocfs2_xattr_bucket_journal_access(handle, bucket, 4400 OCFS2_JOURNAL_ACCESS_WRITE); 4401 if (ret < 0) { 4402 mlog_errno(ret); 4403 goto out; 4404 } 4405 4406 xh = (struct ocfs2_xattr_header *)bucket_buf; 4407 entries = (char *)xh->xh_entries; 4408 xh_free_start = le16_to_cpu(xh->xh_free_start); 4409 4410 trace_ocfs2_defrag_xattr_bucket( 4411 (unsigned long long)blkno, le16_to_cpu(xh->xh_count), 4412 xh_free_start, le16_to_cpu(xh->xh_name_value_len)); 4413 4414 /* 4415 * sort all the entries by their offset. 4416 * the largest will be the first, so that we can 4417 * move them to the end one by one. 4418 */ 4419 sort(entries, le16_to_cpu(xh->xh_count), 4420 sizeof(struct ocfs2_xattr_entry), 4421 cmp_xe_offset, swap_xe); 4422 4423 /* Move all name/values to the end of the bucket. */ 4424 xe = xh->xh_entries; 4425 end = OCFS2_XATTR_BUCKET_SIZE; 4426 for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) { 4427 offset = le16_to_cpu(xe->xe_name_offset); 4428 len = namevalue_size_xe(xe); 4429 4430 /* 4431 * We must make sure that the name/value pair 4432 * exist in the same block. So adjust end to 4433 * the previous block end if needed. 4434 */ 4435 if (((end - len) / blocksize != 4436 (end - 1) / blocksize)) 4437 end = end - end % blocksize; 4438 4439 if (end > offset + len) { 4440 memmove(bucket_buf + end - len, 4441 bucket_buf + offset, len); 4442 xe->xe_name_offset = cpu_to_le16(end - len); 4443 } 4444 4445 mlog_bug_on_msg(end < offset + len, "Defrag check failed for " 4446 "bucket %llu\n", (unsigned long long)blkno); 4447 4448 end -= len; 4449 } 4450 4451 mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for " 4452 "bucket %llu\n", (unsigned long long)blkno); 4453 4454 if (xh_free_start == end) 4455 goto out; 4456 4457 memset(bucket_buf + xh_free_start, 0, end - xh_free_start); 4458 xh->xh_free_start = cpu_to_le16(end); 4459 4460 /* sort the entries by their name_hash. */ 4461 sort(entries, le16_to_cpu(xh->xh_count), 4462 sizeof(struct ocfs2_xattr_entry), 4463 cmp_xe, swap_xe); 4464 4465 buf = bucket_buf; 4466 for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize) 4467 memcpy(bucket_block(bucket, i), buf, blocksize); 4468 ocfs2_xattr_bucket_journal_dirty(handle, bucket); 4469 4470 out: 4471 kfree(bucket_buf); 4472 return ret; 4473 } 4474 4475 /* 4476 * prev_blkno points to the start of an existing extent. new_blkno 4477 * points to a newly allocated extent. Because we know each of our 4478 * clusters contains more than bucket, we can easily split one cluster 4479 * at a bucket boundary. So we take the last cluster of the existing 4480 * extent and split it down the middle. We move the last half of the 4481 * buckets in the last cluster of the existing extent over to the new 4482 * extent. 4483 * 4484 * first_bh is the buffer at prev_blkno so we can update the existing 4485 * extent's bucket count. header_bh is the bucket were we were hoping 4486 * to insert our xattr. If the bucket move places the target in the new 4487 * extent, we'll update first_bh and header_bh after modifying the old 4488 * extent. 4489 * 4490 * first_hash will be set as the 1st xe's name_hash in the new extent. 4491 */ 4492 static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode, 4493 handle_t *handle, 4494 struct ocfs2_xattr_bucket *first, 4495 struct ocfs2_xattr_bucket *target, 4496 u64 new_blkno, 4497 u32 num_clusters, 4498 u32 *first_hash) 4499 { 4500 int ret; 4501 struct super_block *sb = inode->i_sb; 4502 int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(sb); 4503 int num_buckets = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb)); 4504 int to_move = num_buckets / 2; 4505 u64 src_blkno; 4506 u64 last_cluster_blkno = bucket_blkno(first) + 4507 ((num_clusters - 1) * ocfs2_clusters_to_blocks(sb, 1)); 4508 4509 BUG_ON(le16_to_cpu(bucket_xh(first)->xh_num_buckets) < num_buckets); 4510 BUG_ON(OCFS2_XATTR_BUCKET_SIZE == OCFS2_SB(sb)->s_clustersize); 4511 4512 trace_ocfs2_mv_xattr_bucket_cross_cluster( 4513 (unsigned long long)last_cluster_blkno, 4514 (unsigned long long)new_blkno); 4515 4516 ret = ocfs2_mv_xattr_buckets(inode, handle, bucket_blkno(first), 4517 last_cluster_blkno, new_blkno, 4518 to_move, first_hash); 4519 if (ret) { 4520 mlog_errno(ret); 4521 goto out; 4522 } 4523 4524 /* This is the first bucket that got moved */ 4525 src_blkno = last_cluster_blkno + (to_move * blks_per_bucket); 4526 4527 /* 4528 * If the target bucket was part of the moved buckets, we need to 4529 * update first and target. 4530 */ 4531 if (bucket_blkno(target) >= src_blkno) { 4532 /* Find the block for the new target bucket */ 4533 src_blkno = new_blkno + 4534 (bucket_blkno(target) - src_blkno); 4535 4536 ocfs2_xattr_bucket_relse(first); 4537 ocfs2_xattr_bucket_relse(target); 4538 4539 /* 4540 * These shouldn't fail - the buffers are in the 4541 * journal from ocfs2_cp_xattr_bucket(). 4542 */ 4543 ret = ocfs2_read_xattr_bucket(first, new_blkno); 4544 if (ret) { 4545 mlog_errno(ret); 4546 goto out; 4547 } 4548 ret = ocfs2_read_xattr_bucket(target, src_blkno); 4549 if (ret) 4550 mlog_errno(ret); 4551 4552 } 4553 4554 out: 4555 return ret; 4556 } 4557 4558 /* 4559 * Find the suitable pos when we divide a bucket into 2. 4560 * We have to make sure the xattrs with the same hash value exist 4561 * in the same bucket. 4562 * 4563 * If this ocfs2_xattr_header covers more than one hash value, find a 4564 * place where the hash value changes. Try to find the most even split. 4565 * The most common case is that all entries have different hash values, 4566 * and the first check we make will find a place to split. 4567 */ 4568 static int ocfs2_xattr_find_divide_pos(struct ocfs2_xattr_header *xh) 4569 { 4570 struct ocfs2_xattr_entry *entries = xh->xh_entries; 4571 int count = le16_to_cpu(xh->xh_count); 4572 int delta, middle = count / 2; 4573 4574 /* 4575 * We start at the middle. Each step gets farther away in both 4576 * directions. We therefore hit the change in hash value 4577 * nearest to the middle. Note that this loop does not execute for 4578 * count < 2. 4579 */ 4580 for (delta = 0; delta < middle; delta++) { 4581 /* Let's check delta earlier than middle */ 4582 if (cmp_xe(&entries[middle - delta - 1], 4583 &entries[middle - delta])) 4584 return middle - delta; 4585 4586 /* For even counts, don't walk off the end */ 4587 if ((middle + delta + 1) == count) 4588 continue; 4589 4590 /* Now try delta past middle */ 4591 if (cmp_xe(&entries[middle + delta], 4592 &entries[middle + delta + 1])) 4593 return middle + delta + 1; 4594 } 4595 4596 /* Every entry had the same hash */ 4597 return count; 4598 } 4599 4600 /* 4601 * Move some xattrs in old bucket(blk) to new bucket(new_blk). 4602 * first_hash will record the 1st hash of the new bucket. 4603 * 4604 * Normally half of the xattrs will be moved. But we have to make 4605 * sure that the xattrs with the same hash value are stored in the 4606 * same bucket. If all the xattrs in this bucket have the same hash 4607 * value, the new bucket will be initialized as an empty one and the 4608 * first_hash will be initialized as (hash_value+1). 4609 */ 4610 static int ocfs2_divide_xattr_bucket(struct inode *inode, 4611 handle_t *handle, 4612 u64 blk, 4613 u64 new_blk, 4614 u32 *first_hash, 4615 int new_bucket_head) 4616 { 4617 int ret, i; 4618 int count, start, len, name_value_len = 0, name_offset = 0; 4619 struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL; 4620 struct ocfs2_xattr_header *xh; 4621 struct ocfs2_xattr_entry *xe; 4622 int blocksize = inode->i_sb->s_blocksize; 4623 4624 trace_ocfs2_divide_xattr_bucket_begin((unsigned long long)blk, 4625 (unsigned long long)new_blk); 4626 4627 s_bucket = ocfs2_xattr_bucket_new(inode); 4628 t_bucket = ocfs2_xattr_bucket_new(inode); 4629 if (!s_bucket || !t_bucket) { 4630 ret = -ENOMEM; 4631 mlog_errno(ret); 4632 goto out; 4633 } 4634 4635 ret = ocfs2_read_xattr_bucket(s_bucket, blk); 4636 if (ret) { 4637 mlog_errno(ret); 4638 goto out; 4639 } 4640 4641 ret = ocfs2_xattr_bucket_journal_access(handle, s_bucket, 4642 OCFS2_JOURNAL_ACCESS_WRITE); 4643 if (ret) { 4644 mlog_errno(ret); 4645 goto out; 4646 } 4647 4648 /* 4649 * Even if !new_bucket_head, we're overwriting t_bucket. Thus, 4650 * there's no need to read it. 4651 */ 4652 ret = ocfs2_init_xattr_bucket(t_bucket, new_blk, new_bucket_head); 4653 if (ret) { 4654 mlog_errno(ret); 4655 goto out; 4656 } 4657 4658 /* 4659 * Hey, if we're overwriting t_bucket, what difference does 4660 * ACCESS_CREATE vs ACCESS_WRITE make? See the comment in the 4661 * same part of ocfs2_cp_xattr_bucket(). 4662 */ 4663 ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket, 4664 new_bucket_head ? 4665 OCFS2_JOURNAL_ACCESS_CREATE : 4666 OCFS2_JOURNAL_ACCESS_WRITE); 4667 if (ret) { 4668 mlog_errno(ret); 4669 goto out; 4670 } 4671 4672 xh = bucket_xh(s_bucket); 4673 count = le16_to_cpu(xh->xh_count); 4674 start = ocfs2_xattr_find_divide_pos(xh); 4675 4676 if (start == count) { 4677 xe = &xh->xh_entries[start-1]; 4678 4679 /* 4680 * initialized a new empty bucket here. 4681 * The hash value is set as one larger than 4682 * that of the last entry in the previous bucket. 4683 */ 4684 for (i = 0; i < t_bucket->bu_blocks; i++) 4685 memset(bucket_block(t_bucket, i), 0, blocksize); 4686 4687 xh = bucket_xh(t_bucket); 4688 xh->xh_free_start = cpu_to_le16(blocksize); 4689 xh->xh_entries[0].xe_name_hash = xe->xe_name_hash; 4690 le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1); 4691 4692 goto set_num_buckets; 4693 } 4694 4695 /* copy the whole bucket to the new first. */ 4696 ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket); 4697 4698 /* update the new bucket. */ 4699 xh = bucket_xh(t_bucket); 4700 4701 /* 4702 * Calculate the total name/value len and xh_free_start for 4703 * the old bucket first. 4704 */ 4705 name_offset = OCFS2_XATTR_BUCKET_SIZE; 4706 name_value_len = 0; 4707 for (i = 0; i < start; i++) { 4708 xe = &xh->xh_entries[i]; 4709 name_value_len += namevalue_size_xe(xe); 4710 if (le16_to_cpu(xe->xe_name_offset) < name_offset) 4711 name_offset = le16_to_cpu(xe->xe_name_offset); 4712 } 4713 4714 /* 4715 * Now begin the modification to the new bucket. 4716 * 4717 * In the new bucket, We just move the xattr entry to the beginning 4718 * and don't touch the name/value. So there will be some holes in the 4719 * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is 4720 * called. 4721 */ 4722 xe = &xh->xh_entries[start]; 4723 len = sizeof(struct ocfs2_xattr_entry) * (count - start); 4724 trace_ocfs2_divide_xattr_bucket_move(len, 4725 (int)((char *)xe - (char *)xh), 4726 (int)((char *)xh->xh_entries - (char *)xh)); 4727 memmove((char *)xh->xh_entries, (char *)xe, len); 4728 xe = &xh->xh_entries[count - start]; 4729 len = sizeof(struct ocfs2_xattr_entry) * start; 4730 memset((char *)xe, 0, len); 4731 4732 le16_add_cpu(&xh->xh_count, -start); 4733 le16_add_cpu(&xh->xh_name_value_len, -name_value_len); 4734 4735 /* Calculate xh_free_start for the new bucket. */ 4736 xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE); 4737 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 4738 xe = &xh->xh_entries[i]; 4739 if (le16_to_cpu(xe->xe_name_offset) < 4740 le16_to_cpu(xh->xh_free_start)) 4741 xh->xh_free_start = xe->xe_name_offset; 4742 } 4743 4744 set_num_buckets: 4745 /* set xh->xh_num_buckets for the new xh. */ 4746 if (new_bucket_head) 4747 xh->xh_num_buckets = cpu_to_le16(1); 4748 else 4749 xh->xh_num_buckets = 0; 4750 4751 ocfs2_xattr_bucket_journal_dirty(handle, t_bucket); 4752 4753 /* store the first_hash of the new bucket. */ 4754 if (first_hash) 4755 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash); 4756 4757 /* 4758 * Now only update the 1st block of the old bucket. If we 4759 * just added a new empty bucket, there is no need to modify 4760 * it. 4761 */ 4762 if (start == count) 4763 goto out; 4764 4765 xh = bucket_xh(s_bucket); 4766 memset(&xh->xh_entries[start], 0, 4767 sizeof(struct ocfs2_xattr_entry) * (count - start)); 4768 xh->xh_count = cpu_to_le16(start); 4769 xh->xh_free_start = cpu_to_le16(name_offset); 4770 xh->xh_name_value_len = cpu_to_le16(name_value_len); 4771 4772 ocfs2_xattr_bucket_journal_dirty(handle, s_bucket); 4773 4774 out: 4775 ocfs2_xattr_bucket_free(s_bucket); 4776 ocfs2_xattr_bucket_free(t_bucket); 4777 4778 return ret; 4779 } 4780 4781 /* 4782 * Copy xattr from one bucket to another bucket. 4783 * 4784 * The caller must make sure that the journal transaction 4785 * has enough space for journaling. 4786 */ 4787 static int ocfs2_cp_xattr_bucket(struct inode *inode, 4788 handle_t *handle, 4789 u64 s_blkno, 4790 u64 t_blkno, 4791 int t_is_new) 4792 { 4793 int ret; 4794 struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL; 4795 4796 BUG_ON(s_blkno == t_blkno); 4797 4798 trace_ocfs2_cp_xattr_bucket((unsigned long long)s_blkno, 4799 (unsigned long long)t_blkno, 4800 t_is_new); 4801 4802 s_bucket = ocfs2_xattr_bucket_new(inode); 4803 t_bucket = ocfs2_xattr_bucket_new(inode); 4804 if (!s_bucket || !t_bucket) { 4805 ret = -ENOMEM; 4806 mlog_errno(ret); 4807 goto out; 4808 } 4809 4810 ret = ocfs2_read_xattr_bucket(s_bucket, s_blkno); 4811 if (ret) 4812 goto out; 4813 4814 /* 4815 * Even if !t_is_new, we're overwriting t_bucket. Thus, 4816 * there's no need to read it. 4817 */ 4818 ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno, t_is_new); 4819 if (ret) 4820 goto out; 4821 4822 /* 4823 * Hey, if we're overwriting t_bucket, what difference does 4824 * ACCESS_CREATE vs ACCESS_WRITE make? Well, if we allocated a new 4825 * cluster to fill, we came here from 4826 * ocfs2_mv_xattr_buckets(), and it is really new - 4827 * ACCESS_CREATE is required. But we also might have moved data 4828 * out of t_bucket before extending back into it. 4829 * ocfs2_add_new_xattr_bucket() can do this - its call to 4830 * ocfs2_add_new_xattr_cluster() may have created a new extent 4831 * and copied out the end of the old extent. Then it re-extends 4832 * the old extent back to create space for new xattrs. That's 4833 * how we get here, and the bucket isn't really new. 4834 */ 4835 ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket, 4836 t_is_new ? 4837 OCFS2_JOURNAL_ACCESS_CREATE : 4838 OCFS2_JOURNAL_ACCESS_WRITE); 4839 if (ret) 4840 goto out; 4841 4842 ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket); 4843 ocfs2_xattr_bucket_journal_dirty(handle, t_bucket); 4844 4845 out: 4846 ocfs2_xattr_bucket_free(t_bucket); 4847 ocfs2_xattr_bucket_free(s_bucket); 4848 4849 return ret; 4850 } 4851 4852 /* 4853 * src_blk points to the start of an existing extent. last_blk points to 4854 * last cluster in that extent. to_blk points to a newly allocated 4855 * extent. We copy the buckets from the cluster at last_blk to the new 4856 * extent. If start_bucket is non-zero, we skip that many buckets before 4857 * we start copying. The new extent's xh_num_buckets gets set to the 4858 * number of buckets we copied. The old extent's xh_num_buckets shrinks 4859 * by the same amount. 4860 */ 4861 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle, 4862 u64 src_blk, u64 last_blk, u64 to_blk, 4863 unsigned int start_bucket, 4864 u32 *first_hash) 4865 { 4866 int i, ret, credits; 4867 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 4868 int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 4869 int num_buckets = ocfs2_xattr_buckets_per_cluster(osb); 4870 struct ocfs2_xattr_bucket *old_first, *new_first; 4871 4872 trace_ocfs2_mv_xattr_buckets((unsigned long long)last_blk, 4873 (unsigned long long)to_blk); 4874 4875 BUG_ON(start_bucket >= num_buckets); 4876 if (start_bucket) { 4877 num_buckets -= start_bucket; 4878 last_blk += (start_bucket * blks_per_bucket); 4879 } 4880 4881 /* The first bucket of the original extent */ 4882 old_first = ocfs2_xattr_bucket_new(inode); 4883 /* The first bucket of the new extent */ 4884 new_first = ocfs2_xattr_bucket_new(inode); 4885 if (!old_first || !new_first) { 4886 ret = -ENOMEM; 4887 mlog_errno(ret); 4888 goto out; 4889 } 4890 4891 ret = ocfs2_read_xattr_bucket(old_first, src_blk); 4892 if (ret) { 4893 mlog_errno(ret); 4894 goto out; 4895 } 4896 4897 /* 4898 * We need to update the first bucket of the old extent and all 4899 * the buckets going to the new extent. 4900 */ 4901 credits = ((num_buckets + 1) * blks_per_bucket); 4902 ret = ocfs2_extend_trans(handle, credits); 4903 if (ret) { 4904 mlog_errno(ret); 4905 goto out; 4906 } 4907 4908 ret = ocfs2_xattr_bucket_journal_access(handle, old_first, 4909 OCFS2_JOURNAL_ACCESS_WRITE); 4910 if (ret) { 4911 mlog_errno(ret); 4912 goto out; 4913 } 4914 4915 for (i = 0; i < num_buckets; i++) { 4916 ret = ocfs2_cp_xattr_bucket(inode, handle, 4917 last_blk + (i * blks_per_bucket), 4918 to_blk + (i * blks_per_bucket), 4919 1); 4920 if (ret) { 4921 mlog_errno(ret); 4922 goto out; 4923 } 4924 } 4925 4926 /* 4927 * Get the new bucket ready before we dirty anything 4928 * (This actually shouldn't fail, because we already dirtied 4929 * it once in ocfs2_cp_xattr_bucket()). 4930 */ 4931 ret = ocfs2_read_xattr_bucket(new_first, to_blk); 4932 if (ret) { 4933 mlog_errno(ret); 4934 goto out; 4935 } 4936 ret = ocfs2_xattr_bucket_journal_access(handle, new_first, 4937 OCFS2_JOURNAL_ACCESS_WRITE); 4938 if (ret) { 4939 mlog_errno(ret); 4940 goto out; 4941 } 4942 4943 /* Now update the headers */ 4944 le16_add_cpu(&bucket_xh(old_first)->xh_num_buckets, -num_buckets); 4945 ocfs2_xattr_bucket_journal_dirty(handle, old_first); 4946 4947 bucket_xh(new_first)->xh_num_buckets = cpu_to_le16(num_buckets); 4948 ocfs2_xattr_bucket_journal_dirty(handle, new_first); 4949 4950 if (first_hash) 4951 *first_hash = le32_to_cpu(bucket_xh(new_first)->xh_entries[0].xe_name_hash); 4952 4953 out: 4954 ocfs2_xattr_bucket_free(new_first); 4955 ocfs2_xattr_bucket_free(old_first); 4956 return ret; 4957 } 4958 4959 /* 4960 * Move some xattrs in this cluster to the new cluster. 4961 * This function should only be called when bucket size == cluster size. 4962 * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead. 4963 */ 4964 static int ocfs2_divide_xattr_cluster(struct inode *inode, 4965 handle_t *handle, 4966 u64 prev_blk, 4967 u64 new_blk, 4968 u32 *first_hash) 4969 { 4970 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 4971 int ret, credits = 2 * blk_per_bucket; 4972 4973 BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize); 4974 4975 ret = ocfs2_extend_trans(handle, credits); 4976 if (ret) { 4977 mlog_errno(ret); 4978 return ret; 4979 } 4980 4981 /* Move half of the xattr in start_blk to the next bucket. */ 4982 return ocfs2_divide_xattr_bucket(inode, handle, prev_blk, 4983 new_blk, first_hash, 1); 4984 } 4985 4986 /* 4987 * Move some xattrs from the old cluster to the new one since they are not 4988 * contiguous in ocfs2 xattr tree. 4989 * 4990 * new_blk starts a new separate cluster, and we will move some xattrs from 4991 * prev_blk to it. v_start will be set as the first name hash value in this 4992 * new cluster so that it can be used as e_cpos during tree insertion and 4993 * don't collide with our original b-tree operations. first_bh and header_bh 4994 * will also be updated since they will be used in ocfs2_extend_xattr_bucket 4995 * to extend the insert bucket. 4996 * 4997 * The problem is how much xattr should we move to the new one and when should 4998 * we update first_bh and header_bh? 4999 * 1. If cluster size > bucket size, that means the previous cluster has more 5000 * than 1 bucket, so just move half nums of bucket into the new cluster and 5001 * update the first_bh and header_bh if the insert bucket has been moved 5002 * to the new cluster. 5003 * 2. If cluster_size == bucket_size: 5004 * a) If the previous extent rec has more than one cluster and the insert 5005 * place isn't in the last cluster, copy the entire last cluster to the 5006 * new one. This time, we don't need to upate the first_bh and header_bh 5007 * since they will not be moved into the new cluster. 5008 * b) Otherwise, move the bottom half of the xattrs in the last cluster into 5009 * the new one. And we set the extend flag to zero if the insert place is 5010 * moved into the new allocated cluster since no extend is needed. 5011 */ 5012 static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode, 5013 handle_t *handle, 5014 struct ocfs2_xattr_bucket *first, 5015 struct ocfs2_xattr_bucket *target, 5016 u64 new_blk, 5017 u32 prev_clusters, 5018 u32 *v_start, 5019 int *extend) 5020 { 5021 int ret; 5022 5023 trace_ocfs2_adjust_xattr_cross_cluster( 5024 (unsigned long long)bucket_blkno(first), 5025 (unsigned long long)new_blk, prev_clusters); 5026 5027 if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) { 5028 ret = ocfs2_mv_xattr_bucket_cross_cluster(inode, 5029 handle, 5030 first, target, 5031 new_blk, 5032 prev_clusters, 5033 v_start); 5034 if (ret) 5035 mlog_errno(ret); 5036 } else { 5037 /* The start of the last cluster in the first extent */ 5038 u64 last_blk = bucket_blkno(first) + 5039 ((prev_clusters - 1) * 5040 ocfs2_clusters_to_blocks(inode->i_sb, 1)); 5041 5042 if (prev_clusters > 1 && bucket_blkno(target) != last_blk) { 5043 ret = ocfs2_mv_xattr_buckets(inode, handle, 5044 bucket_blkno(first), 5045 last_blk, new_blk, 0, 5046 v_start); 5047 if (ret) 5048 mlog_errno(ret); 5049 } else { 5050 ret = ocfs2_divide_xattr_cluster(inode, handle, 5051 last_blk, new_blk, 5052 v_start); 5053 if (ret) 5054 mlog_errno(ret); 5055 5056 if ((bucket_blkno(target) == last_blk) && extend) 5057 *extend = 0; 5058 } 5059 } 5060 5061 return ret; 5062 } 5063 5064 /* 5065 * Add a new cluster for xattr storage. 5066 * 5067 * If the new cluster is contiguous with the previous one, it will be 5068 * appended to the same extent record, and num_clusters will be updated. 5069 * If not, we will insert a new extent for it and move some xattrs in 5070 * the last cluster into the new allocated one. 5071 * We also need to limit the maximum size of a btree leaf, otherwise we'll 5072 * lose the benefits of hashing because we'll have to search large leaves. 5073 * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize, 5074 * if it's bigger). 5075 * 5076 * first_bh is the first block of the previous extent rec and header_bh 5077 * indicates the bucket we will insert the new xattrs. They will be updated 5078 * when the header_bh is moved into the new cluster. 5079 */ 5080 static int ocfs2_add_new_xattr_cluster(struct inode *inode, 5081 struct buffer_head *root_bh, 5082 struct ocfs2_xattr_bucket *first, 5083 struct ocfs2_xattr_bucket *target, 5084 u32 *num_clusters, 5085 u32 prev_cpos, 5086 int *extend, 5087 struct ocfs2_xattr_set_ctxt *ctxt) 5088 { 5089 int ret; 5090 u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 5091 u32 prev_clusters = *num_clusters; 5092 u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0; 5093 u64 block; 5094 handle_t *handle = ctxt->handle; 5095 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5096 struct ocfs2_extent_tree et; 5097 5098 trace_ocfs2_add_new_xattr_cluster_begin( 5099 (unsigned long long)OCFS2_I(inode)->ip_blkno, 5100 (unsigned long long)bucket_blkno(first), 5101 prev_cpos, prev_clusters); 5102 5103 ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh); 5104 5105 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh, 5106 OCFS2_JOURNAL_ACCESS_WRITE); 5107 if (ret < 0) { 5108 mlog_errno(ret); 5109 goto leave; 5110 } 5111 5112 ret = __ocfs2_claim_clusters(handle, ctxt->data_ac, 1, 5113 clusters_to_add, &bit_off, &num_bits); 5114 if (ret < 0) { 5115 if (ret != -ENOSPC) 5116 mlog_errno(ret); 5117 goto leave; 5118 } 5119 5120 BUG_ON(num_bits > clusters_to_add); 5121 5122 block = ocfs2_clusters_to_blocks(osb->sb, bit_off); 5123 trace_ocfs2_add_new_xattr_cluster((unsigned long long)block, num_bits); 5124 5125 if (bucket_blkno(first) + (prev_clusters * bpc) == block && 5126 (prev_clusters + num_bits) << osb->s_clustersize_bits <= 5127 OCFS2_MAX_XATTR_TREE_LEAF_SIZE) { 5128 /* 5129 * If this cluster is contiguous with the old one and 5130 * adding this new cluster, we don't surpass the limit of 5131 * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be 5132 * initialized and used like other buckets in the previous 5133 * cluster. 5134 * So add it as a contiguous one. The caller will handle 5135 * its init process. 5136 */ 5137 v_start = prev_cpos + prev_clusters; 5138 *num_clusters = prev_clusters + num_bits; 5139 } else { 5140 ret = ocfs2_adjust_xattr_cross_cluster(inode, 5141 handle, 5142 first, 5143 target, 5144 block, 5145 prev_clusters, 5146 &v_start, 5147 extend); 5148 if (ret) { 5149 mlog_errno(ret); 5150 goto leave; 5151 } 5152 } 5153 5154 trace_ocfs2_add_new_xattr_cluster_insert((unsigned long long)block, 5155 v_start, num_bits); 5156 ret = ocfs2_insert_extent(handle, &et, v_start, block, 5157 num_bits, 0, ctxt->meta_ac); 5158 if (ret < 0) { 5159 mlog_errno(ret); 5160 goto leave; 5161 } 5162 5163 ocfs2_journal_dirty(handle, root_bh); 5164 5165 leave: 5166 return ret; 5167 } 5168 5169 /* 5170 * We are given an extent. 'first' is the bucket at the very front of 5171 * the extent. The extent has space for an additional bucket past 5172 * bucket_xh(first)->xh_num_buckets. 'target_blkno' is the block number 5173 * of the target bucket. We wish to shift every bucket past the target 5174 * down one, filling in that additional space. When we get back to the 5175 * target, we split the target between itself and the now-empty bucket 5176 * at target+1 (aka, target_blkno + blks_per_bucket). 5177 */ 5178 static int ocfs2_extend_xattr_bucket(struct inode *inode, 5179 handle_t *handle, 5180 struct ocfs2_xattr_bucket *first, 5181 u64 target_blk, 5182 u32 num_clusters) 5183 { 5184 int ret, credits; 5185 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5186 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 5187 u64 end_blk; 5188 u16 new_bucket = le16_to_cpu(bucket_xh(first)->xh_num_buckets); 5189 5190 trace_ocfs2_extend_xattr_bucket((unsigned long long)target_blk, 5191 (unsigned long long)bucket_blkno(first), 5192 num_clusters, new_bucket); 5193 5194 /* The extent must have room for an additional bucket */ 5195 BUG_ON(new_bucket >= 5196 (num_clusters * ocfs2_xattr_buckets_per_cluster(osb))); 5197 5198 /* end_blk points to the last existing bucket */ 5199 end_blk = bucket_blkno(first) + ((new_bucket - 1) * blk_per_bucket); 5200 5201 /* 5202 * end_blk is the start of the last existing bucket. 5203 * Thus, (end_blk - target_blk) covers the target bucket and 5204 * every bucket after it up to, but not including, the last 5205 * existing bucket. Then we add the last existing bucket, the 5206 * new bucket, and the first bucket (3 * blk_per_bucket). 5207 */ 5208 credits = (end_blk - target_blk) + (3 * blk_per_bucket); 5209 ret = ocfs2_extend_trans(handle, credits); 5210 if (ret) { 5211 mlog_errno(ret); 5212 goto out; 5213 } 5214 5215 ret = ocfs2_xattr_bucket_journal_access(handle, first, 5216 OCFS2_JOURNAL_ACCESS_WRITE); 5217 if (ret) { 5218 mlog_errno(ret); 5219 goto out; 5220 } 5221 5222 while (end_blk != target_blk) { 5223 ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk, 5224 end_blk + blk_per_bucket, 0); 5225 if (ret) 5226 goto out; 5227 end_blk -= blk_per_bucket; 5228 } 5229 5230 /* Move half of the xattr in target_blkno to the next bucket. */ 5231 ret = ocfs2_divide_xattr_bucket(inode, handle, target_blk, 5232 target_blk + blk_per_bucket, NULL, 0); 5233 5234 le16_add_cpu(&bucket_xh(first)->xh_num_buckets, 1); 5235 ocfs2_xattr_bucket_journal_dirty(handle, first); 5236 5237 out: 5238 return ret; 5239 } 5240 5241 /* 5242 * Add new xattr bucket in an extent record and adjust the buckets 5243 * accordingly. xb_bh is the ocfs2_xattr_block, and target is the 5244 * bucket we want to insert into. 5245 * 5246 * In the easy case, we will move all the buckets after target down by 5247 * one. Half of target's xattrs will be moved to the next bucket. 5248 * 5249 * If current cluster is full, we'll allocate a new one. This may not 5250 * be contiguous. The underlying calls will make sure that there is 5251 * space for the insert, shifting buckets around if necessary. 5252 * 'target' may be moved by those calls. 5253 */ 5254 static int ocfs2_add_new_xattr_bucket(struct inode *inode, 5255 struct buffer_head *xb_bh, 5256 struct ocfs2_xattr_bucket *target, 5257 struct ocfs2_xattr_set_ctxt *ctxt) 5258 { 5259 struct ocfs2_xattr_block *xb = 5260 (struct ocfs2_xattr_block *)xb_bh->b_data; 5261 struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root; 5262 struct ocfs2_extent_list *el = &xb_root->xt_list; 5263 u32 name_hash = 5264 le32_to_cpu(bucket_xh(target)->xh_entries[0].xe_name_hash); 5265 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5266 int ret, num_buckets, extend = 1; 5267 u64 p_blkno; 5268 u32 e_cpos, num_clusters; 5269 /* The bucket at the front of the extent */ 5270 struct ocfs2_xattr_bucket *first; 5271 5272 trace_ocfs2_add_new_xattr_bucket( 5273 (unsigned long long)bucket_blkno(target)); 5274 5275 /* The first bucket of the original extent */ 5276 first = ocfs2_xattr_bucket_new(inode); 5277 if (!first) { 5278 ret = -ENOMEM; 5279 mlog_errno(ret); 5280 goto out; 5281 } 5282 5283 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos, 5284 &num_clusters, el); 5285 if (ret) { 5286 mlog_errno(ret); 5287 goto out; 5288 } 5289 5290 ret = ocfs2_read_xattr_bucket(first, p_blkno); 5291 if (ret) { 5292 mlog_errno(ret); 5293 goto out; 5294 } 5295 5296 num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters; 5297 if (num_buckets == le16_to_cpu(bucket_xh(first)->xh_num_buckets)) { 5298 /* 5299 * This can move first+target if the target bucket moves 5300 * to the new extent. 5301 */ 5302 ret = ocfs2_add_new_xattr_cluster(inode, 5303 xb_bh, 5304 first, 5305 target, 5306 &num_clusters, 5307 e_cpos, 5308 &extend, 5309 ctxt); 5310 if (ret) { 5311 mlog_errno(ret); 5312 goto out; 5313 } 5314 } 5315 5316 if (extend) { 5317 ret = ocfs2_extend_xattr_bucket(inode, 5318 ctxt->handle, 5319 first, 5320 bucket_blkno(target), 5321 num_clusters); 5322 if (ret) 5323 mlog_errno(ret); 5324 } 5325 5326 out: 5327 ocfs2_xattr_bucket_free(first); 5328 5329 return ret; 5330 } 5331 5332 /* 5333 * Truncate the specified xe_off entry in xattr bucket. 5334 * bucket is indicated by header_bh and len is the new length. 5335 * Both the ocfs2_xattr_value_root and the entry will be updated here. 5336 * 5337 * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed. 5338 */ 5339 static int ocfs2_xattr_bucket_value_truncate(struct inode *inode, 5340 struct ocfs2_xattr_bucket *bucket, 5341 int xe_off, 5342 int len, 5343 struct ocfs2_xattr_set_ctxt *ctxt) 5344 { 5345 int ret, offset; 5346 u64 value_blk; 5347 struct ocfs2_xattr_entry *xe; 5348 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 5349 size_t blocksize = inode->i_sb->s_blocksize; 5350 struct ocfs2_xattr_value_buf vb = { 5351 .vb_access = ocfs2_journal_access, 5352 }; 5353 5354 xe = &xh->xh_entries[xe_off]; 5355 5356 BUG_ON(!xe || ocfs2_xattr_is_local(xe)); 5357 5358 offset = le16_to_cpu(xe->xe_name_offset) + 5359 OCFS2_XATTR_SIZE(xe->xe_name_len); 5360 5361 value_blk = offset / blocksize; 5362 5363 /* We don't allow ocfs2_xattr_value to be stored in different block. */ 5364 BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize); 5365 5366 vb.vb_bh = bucket->bu_bhs[value_blk]; 5367 BUG_ON(!vb.vb_bh); 5368 5369 vb.vb_xv = (struct ocfs2_xattr_value_root *) 5370 (vb.vb_bh->b_data + offset % blocksize); 5371 5372 /* 5373 * From here on out we have to dirty the bucket. The generic 5374 * value calls only modify one of the bucket's bhs, but we need 5375 * to send the bucket at once. So if they error, they *could* have 5376 * modified something. We have to assume they did, and dirty 5377 * the whole bucket. This leaves us in a consistent state. 5378 */ 5379 trace_ocfs2_xattr_bucket_value_truncate( 5380 (unsigned long long)bucket_blkno(bucket), xe_off, len); 5381 ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt); 5382 if (ret) { 5383 mlog_errno(ret); 5384 goto out; 5385 } 5386 5387 ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket, 5388 OCFS2_JOURNAL_ACCESS_WRITE); 5389 if (ret) { 5390 mlog_errno(ret); 5391 goto out; 5392 } 5393 5394 xe->xe_value_size = cpu_to_le64(len); 5395 5396 ocfs2_xattr_bucket_journal_dirty(ctxt->handle, bucket); 5397 5398 out: 5399 return ret; 5400 } 5401 5402 static int ocfs2_rm_xattr_cluster(struct inode *inode, 5403 struct buffer_head *root_bh, 5404 u64 blkno, 5405 u32 cpos, 5406 u32 len, 5407 void *para) 5408 { 5409 int ret; 5410 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5411 struct inode *tl_inode = osb->osb_tl_inode; 5412 handle_t *handle; 5413 struct ocfs2_xattr_block *xb = 5414 (struct ocfs2_xattr_block *)root_bh->b_data; 5415 struct ocfs2_alloc_context *meta_ac = NULL; 5416 struct ocfs2_cached_dealloc_ctxt dealloc; 5417 struct ocfs2_extent_tree et; 5418 5419 ret = ocfs2_iterate_xattr_buckets(inode, blkno, len, 5420 ocfs2_delete_xattr_in_bucket, para); 5421 if (ret) { 5422 mlog_errno(ret); 5423 return ret; 5424 } 5425 5426 ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh); 5427 5428 ocfs2_init_dealloc_ctxt(&dealloc); 5429 5430 trace_ocfs2_rm_xattr_cluster( 5431 (unsigned long long)OCFS2_I(inode)->ip_blkno, 5432 (unsigned long long)blkno, cpos, len); 5433 5434 ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), blkno, 5435 len); 5436 5437 ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac); 5438 if (ret) { 5439 mlog_errno(ret); 5440 return ret; 5441 } 5442 5443 mutex_lock(&tl_inode->i_mutex); 5444 5445 if (ocfs2_truncate_log_needs_flush(osb)) { 5446 ret = __ocfs2_flush_truncate_log(osb); 5447 if (ret < 0) { 5448 mlog_errno(ret); 5449 goto out; 5450 } 5451 } 5452 5453 handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb)); 5454 if (IS_ERR(handle)) { 5455 ret = -ENOMEM; 5456 mlog_errno(ret); 5457 goto out; 5458 } 5459 5460 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh, 5461 OCFS2_JOURNAL_ACCESS_WRITE); 5462 if (ret) { 5463 mlog_errno(ret); 5464 goto out_commit; 5465 } 5466 5467 ret = ocfs2_remove_extent(handle, &et, cpos, len, meta_ac, 5468 &dealloc); 5469 if (ret) { 5470 mlog_errno(ret); 5471 goto out_commit; 5472 } 5473 5474 le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len); 5475 ocfs2_journal_dirty(handle, root_bh); 5476 5477 ret = ocfs2_truncate_log_append(osb, handle, blkno, len); 5478 if (ret) 5479 mlog_errno(ret); 5480 ocfs2_update_inode_fsync_trans(handle, inode, 0); 5481 5482 out_commit: 5483 ocfs2_commit_trans(osb, handle); 5484 out: 5485 ocfs2_schedule_truncate_log_flush(osb, 1); 5486 5487 mutex_unlock(&tl_inode->i_mutex); 5488 5489 if (meta_ac) 5490 ocfs2_free_alloc_context(meta_ac); 5491 5492 ocfs2_run_deallocs(osb, &dealloc); 5493 5494 return ret; 5495 } 5496 5497 /* 5498 * check whether the xattr bucket is filled up with the same hash value. 5499 * If we want to insert the xattr with the same hash, return -ENOSPC. 5500 * If we want to insert a xattr with different hash value, go ahead 5501 * and ocfs2_divide_xattr_bucket will handle this. 5502 */ 5503 static int ocfs2_check_xattr_bucket_collision(struct inode *inode, 5504 struct ocfs2_xattr_bucket *bucket, 5505 const char *name) 5506 { 5507 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 5508 u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name)); 5509 5510 if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash)) 5511 return 0; 5512 5513 if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash == 5514 xh->xh_entries[0].xe_name_hash) { 5515 mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, " 5516 "hash = %u\n", 5517 (unsigned long long)bucket_blkno(bucket), 5518 le32_to_cpu(xh->xh_entries[0].xe_name_hash)); 5519 return -ENOSPC; 5520 } 5521 5522 return 0; 5523 } 5524 5525 /* 5526 * Try to set the entry in the current bucket. If we fail, the caller 5527 * will handle getting us another bucket. 5528 */ 5529 static int ocfs2_xattr_set_entry_bucket(struct inode *inode, 5530 struct ocfs2_xattr_info *xi, 5531 struct ocfs2_xattr_search *xs, 5532 struct ocfs2_xattr_set_ctxt *ctxt) 5533 { 5534 int ret; 5535 struct ocfs2_xa_loc loc; 5536 5537 trace_ocfs2_xattr_set_entry_bucket(xi->xi_name); 5538 5539 ocfs2_init_xattr_bucket_xa_loc(&loc, xs->bucket, 5540 xs->not_found ? NULL : xs->here); 5541 ret = ocfs2_xa_set(&loc, xi, ctxt); 5542 if (!ret) { 5543 xs->here = loc.xl_entry; 5544 goto out; 5545 } 5546 if (ret != -ENOSPC) { 5547 mlog_errno(ret); 5548 goto out; 5549 } 5550 5551 /* Ok, we need space. Let's try defragmenting the bucket. */ 5552 ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle, 5553 xs->bucket); 5554 if (ret) { 5555 mlog_errno(ret); 5556 goto out; 5557 } 5558 5559 ret = ocfs2_xa_set(&loc, xi, ctxt); 5560 if (!ret) { 5561 xs->here = loc.xl_entry; 5562 goto out; 5563 } 5564 if (ret != -ENOSPC) 5565 mlog_errno(ret); 5566 5567 5568 out: 5569 return ret; 5570 } 5571 5572 static int ocfs2_xattr_set_entry_index_block(struct inode *inode, 5573 struct ocfs2_xattr_info *xi, 5574 struct ocfs2_xattr_search *xs, 5575 struct ocfs2_xattr_set_ctxt *ctxt) 5576 { 5577 int ret; 5578 5579 trace_ocfs2_xattr_set_entry_index_block(xi->xi_name); 5580 5581 ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt); 5582 if (!ret) 5583 goto out; 5584 if (ret != -ENOSPC) { 5585 mlog_errno(ret); 5586 goto out; 5587 } 5588 5589 /* Ack, need more space. Let's try to get another bucket! */ 5590 5591 /* 5592 * We do not allow for overlapping ranges between buckets. And 5593 * the maximum number of collisions we will allow for then is 5594 * one bucket's worth, so check it here whether we need to 5595 * add a new bucket for the insert. 5596 */ 5597 ret = ocfs2_check_xattr_bucket_collision(inode, 5598 xs->bucket, 5599 xi->xi_name); 5600 if (ret) { 5601 mlog_errno(ret); 5602 goto out; 5603 } 5604 5605 ret = ocfs2_add_new_xattr_bucket(inode, 5606 xs->xattr_bh, 5607 xs->bucket, 5608 ctxt); 5609 if (ret) { 5610 mlog_errno(ret); 5611 goto out; 5612 } 5613 5614 /* 5615 * ocfs2_add_new_xattr_bucket() will have updated 5616 * xs->bucket if it moved, but it will not have updated 5617 * any of the other search fields. Thus, we drop it and 5618 * re-search. Everything should be cached, so it'll be 5619 * quick. 5620 */ 5621 ocfs2_xattr_bucket_relse(xs->bucket); 5622 ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh, 5623 xi->xi_name_index, 5624 xi->xi_name, xs); 5625 if (ret && ret != -ENODATA) 5626 goto out; 5627 xs->not_found = ret; 5628 5629 /* Ok, we have a new bucket, let's try again */ 5630 ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt); 5631 if (ret && (ret != -ENOSPC)) 5632 mlog_errno(ret); 5633 5634 out: 5635 return ret; 5636 } 5637 5638 static int ocfs2_delete_xattr_in_bucket(struct inode *inode, 5639 struct ocfs2_xattr_bucket *bucket, 5640 void *para) 5641 { 5642 int ret = 0, ref_credits; 5643 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 5644 u16 i; 5645 struct ocfs2_xattr_entry *xe; 5646 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5647 struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,}; 5648 int credits = ocfs2_remove_extent_credits(osb->sb) + 5649 ocfs2_blocks_per_xattr_bucket(inode->i_sb); 5650 struct ocfs2_xattr_value_root *xv; 5651 struct ocfs2_rm_xattr_bucket_para *args = 5652 (struct ocfs2_rm_xattr_bucket_para *)para; 5653 5654 ocfs2_init_dealloc_ctxt(&ctxt.dealloc); 5655 5656 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 5657 xe = &xh->xh_entries[i]; 5658 if (ocfs2_xattr_is_local(xe)) 5659 continue; 5660 5661 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, 5662 i, &xv, NULL); 5663 if (ret) { 5664 mlog_errno(ret); 5665 break; 5666 } 5667 5668 ret = ocfs2_lock_xattr_remove_allocators(inode, xv, 5669 args->ref_ci, 5670 args->ref_root_bh, 5671 &ctxt.meta_ac, 5672 &ref_credits); 5673 5674 ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits); 5675 if (IS_ERR(ctxt.handle)) { 5676 ret = PTR_ERR(ctxt.handle); 5677 mlog_errno(ret); 5678 break; 5679 } 5680 5681 ret = ocfs2_xattr_bucket_value_truncate(inode, bucket, 5682 i, 0, &ctxt); 5683 5684 ocfs2_commit_trans(osb, ctxt.handle); 5685 if (ctxt.meta_ac) { 5686 ocfs2_free_alloc_context(ctxt.meta_ac); 5687 ctxt.meta_ac = NULL; 5688 } 5689 if (ret) { 5690 mlog_errno(ret); 5691 break; 5692 } 5693 } 5694 5695 if (ctxt.meta_ac) 5696 ocfs2_free_alloc_context(ctxt.meta_ac); 5697 ocfs2_schedule_truncate_log_flush(osb, 1); 5698 ocfs2_run_deallocs(osb, &ctxt.dealloc); 5699 return ret; 5700 } 5701 5702 /* 5703 * Whenever we modify a xattr value root in the bucket(e.g, CoW 5704 * or change the extent record flag), we need to recalculate 5705 * the metaecc for the whole bucket. So it is done here. 5706 * 5707 * Note: 5708 * We have to give the extra credits for the caller. 5709 */ 5710 static int ocfs2_xattr_bucket_post_refcount(struct inode *inode, 5711 handle_t *handle, 5712 void *para) 5713 { 5714 int ret; 5715 struct ocfs2_xattr_bucket *bucket = 5716 (struct ocfs2_xattr_bucket *)para; 5717 5718 ret = ocfs2_xattr_bucket_journal_access(handle, bucket, 5719 OCFS2_JOURNAL_ACCESS_WRITE); 5720 if (ret) { 5721 mlog_errno(ret); 5722 return ret; 5723 } 5724 5725 ocfs2_xattr_bucket_journal_dirty(handle, bucket); 5726 5727 return 0; 5728 } 5729 5730 /* 5731 * Special action we need if the xattr value is refcounted. 5732 * 5733 * 1. If the xattr is refcounted, lock the tree. 5734 * 2. CoW the xattr if we are setting the new value and the value 5735 * will be stored outside. 5736 * 3. In other case, decrease_refcount will work for us, so just 5737 * lock the refcount tree, calculate the meta and credits is OK. 5738 * 5739 * We have to do CoW before ocfs2_init_xattr_set_ctxt since 5740 * currently CoW is a completed transaction, while this function 5741 * will also lock the allocators and let us deadlock. So we will 5742 * CoW the whole xattr value. 5743 */ 5744 static int ocfs2_prepare_refcount_xattr(struct inode *inode, 5745 struct ocfs2_dinode *di, 5746 struct ocfs2_xattr_info *xi, 5747 struct ocfs2_xattr_search *xis, 5748 struct ocfs2_xattr_search *xbs, 5749 struct ocfs2_refcount_tree **ref_tree, 5750 int *meta_add, 5751 int *credits) 5752 { 5753 int ret = 0; 5754 struct ocfs2_xattr_block *xb; 5755 struct ocfs2_xattr_entry *xe; 5756 char *base; 5757 u32 p_cluster, num_clusters; 5758 unsigned int ext_flags; 5759 int name_offset, name_len; 5760 struct ocfs2_xattr_value_buf vb; 5761 struct ocfs2_xattr_bucket *bucket = NULL; 5762 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5763 struct ocfs2_post_refcount refcount; 5764 struct ocfs2_post_refcount *p = NULL; 5765 struct buffer_head *ref_root_bh = NULL; 5766 5767 if (!xis->not_found) { 5768 xe = xis->here; 5769 name_offset = le16_to_cpu(xe->xe_name_offset); 5770 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 5771 base = xis->base; 5772 vb.vb_bh = xis->inode_bh; 5773 vb.vb_access = ocfs2_journal_access_di; 5774 } else { 5775 int i, block_off = 0; 5776 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; 5777 xe = xbs->here; 5778 name_offset = le16_to_cpu(xe->xe_name_offset); 5779 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); 5780 i = xbs->here - xbs->header->xh_entries; 5781 5782 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 5783 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, 5784 bucket_xh(xbs->bucket), 5785 i, &block_off, 5786 &name_offset); 5787 if (ret) { 5788 mlog_errno(ret); 5789 goto out; 5790 } 5791 base = bucket_block(xbs->bucket, block_off); 5792 vb.vb_bh = xbs->bucket->bu_bhs[block_off]; 5793 vb.vb_access = ocfs2_journal_access; 5794 5795 if (ocfs2_meta_ecc(osb)) { 5796 /*create parameters for ocfs2_post_refcount. */ 5797 bucket = xbs->bucket; 5798 refcount.credits = bucket->bu_blocks; 5799 refcount.para = bucket; 5800 refcount.func = 5801 ocfs2_xattr_bucket_post_refcount; 5802 p = &refcount; 5803 } 5804 } else { 5805 base = xbs->base; 5806 vb.vb_bh = xbs->xattr_bh; 5807 vb.vb_access = ocfs2_journal_access_xb; 5808 } 5809 } 5810 5811 if (ocfs2_xattr_is_local(xe)) 5812 goto out; 5813 5814 vb.vb_xv = (struct ocfs2_xattr_value_root *) 5815 (base + name_offset + name_len); 5816 5817 ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster, 5818 &num_clusters, &vb.vb_xv->xr_list, 5819 &ext_flags); 5820 if (ret) { 5821 mlog_errno(ret); 5822 goto out; 5823 } 5824 5825 /* 5826 * We just need to check the 1st extent record, since we always 5827 * CoW the whole xattr. So there shouldn't be a xattr with 5828 * some REFCOUNT extent recs after the 1st one. 5829 */ 5830 if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) 5831 goto out; 5832 5833 ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc), 5834 1, ref_tree, &ref_root_bh); 5835 if (ret) { 5836 mlog_errno(ret); 5837 goto out; 5838 } 5839 5840 /* 5841 * If we are deleting the xattr or the new size will be stored inside, 5842 * cool, leave it there, the xattr truncate process will remove them 5843 * for us(it still needs the refcount tree lock and the meta, credits). 5844 * And the worse case is that every cluster truncate will split the 5845 * refcount tree, and make the original extent become 3. So we will need 5846 * 2 * cluster more extent recs at most. 5847 */ 5848 if (!xi->xi_value || xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE) { 5849 5850 ret = ocfs2_refcounted_xattr_delete_need(inode, 5851 &(*ref_tree)->rf_ci, 5852 ref_root_bh, vb.vb_xv, 5853 meta_add, credits); 5854 if (ret) 5855 mlog_errno(ret); 5856 goto out; 5857 } 5858 5859 ret = ocfs2_refcount_cow_xattr(inode, di, &vb, 5860 *ref_tree, ref_root_bh, 0, 5861 le32_to_cpu(vb.vb_xv->xr_clusters), p); 5862 if (ret) 5863 mlog_errno(ret); 5864 5865 out: 5866 brelse(ref_root_bh); 5867 return ret; 5868 } 5869 5870 /* 5871 * Add the REFCOUNTED flags for all the extent rec in ocfs2_xattr_value_root. 5872 * The physical clusters will be added to refcount tree. 5873 */ 5874 static int ocfs2_xattr_value_attach_refcount(struct inode *inode, 5875 struct ocfs2_xattr_value_root *xv, 5876 struct ocfs2_extent_tree *value_et, 5877 struct ocfs2_caching_info *ref_ci, 5878 struct buffer_head *ref_root_bh, 5879 struct ocfs2_cached_dealloc_ctxt *dealloc, 5880 struct ocfs2_post_refcount *refcount) 5881 { 5882 int ret = 0; 5883 u32 clusters = le32_to_cpu(xv->xr_clusters); 5884 u32 cpos, p_cluster, num_clusters; 5885 struct ocfs2_extent_list *el = &xv->xr_list; 5886 unsigned int ext_flags; 5887 5888 cpos = 0; 5889 while (cpos < clusters) { 5890 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, 5891 &num_clusters, el, &ext_flags); 5892 if (ret) { 5893 mlog_errno(ret); 5894 break; 5895 } 5896 5897 cpos += num_clusters; 5898 if ((ext_flags & OCFS2_EXT_REFCOUNTED)) 5899 continue; 5900 5901 BUG_ON(!p_cluster); 5902 5903 ret = ocfs2_add_refcount_flag(inode, value_et, 5904 ref_ci, ref_root_bh, 5905 cpos - num_clusters, 5906 p_cluster, num_clusters, 5907 dealloc, refcount); 5908 if (ret) { 5909 mlog_errno(ret); 5910 break; 5911 } 5912 } 5913 5914 return ret; 5915 } 5916 5917 /* 5918 * Given a normal ocfs2_xattr_header, refcount all the entries which 5919 * have value stored outside. 5920 * Used for xattrs stored in inode and ocfs2_xattr_block. 5921 */ 5922 static int ocfs2_xattr_attach_refcount_normal(struct inode *inode, 5923 struct ocfs2_xattr_value_buf *vb, 5924 struct ocfs2_xattr_header *header, 5925 struct ocfs2_caching_info *ref_ci, 5926 struct buffer_head *ref_root_bh, 5927 struct ocfs2_cached_dealloc_ctxt *dealloc) 5928 { 5929 5930 struct ocfs2_xattr_entry *xe; 5931 struct ocfs2_xattr_value_root *xv; 5932 struct ocfs2_extent_tree et; 5933 int i, ret = 0; 5934 5935 for (i = 0; i < le16_to_cpu(header->xh_count); i++) { 5936 xe = &header->xh_entries[i]; 5937 5938 if (ocfs2_xattr_is_local(xe)) 5939 continue; 5940 5941 xv = (struct ocfs2_xattr_value_root *)((void *)header + 5942 le16_to_cpu(xe->xe_name_offset) + 5943 OCFS2_XATTR_SIZE(xe->xe_name_len)); 5944 5945 vb->vb_xv = xv; 5946 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); 5947 5948 ret = ocfs2_xattr_value_attach_refcount(inode, xv, &et, 5949 ref_ci, ref_root_bh, 5950 dealloc, NULL); 5951 if (ret) { 5952 mlog_errno(ret); 5953 break; 5954 } 5955 } 5956 5957 return ret; 5958 } 5959 5960 static int ocfs2_xattr_inline_attach_refcount(struct inode *inode, 5961 struct buffer_head *fe_bh, 5962 struct ocfs2_caching_info *ref_ci, 5963 struct buffer_head *ref_root_bh, 5964 struct ocfs2_cached_dealloc_ctxt *dealloc) 5965 { 5966 struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data; 5967 struct ocfs2_xattr_header *header = (struct ocfs2_xattr_header *) 5968 (fe_bh->b_data + inode->i_sb->s_blocksize - 5969 le16_to_cpu(di->i_xattr_inline_size)); 5970 struct ocfs2_xattr_value_buf vb = { 5971 .vb_bh = fe_bh, 5972 .vb_access = ocfs2_journal_access_di, 5973 }; 5974 5975 return ocfs2_xattr_attach_refcount_normal(inode, &vb, header, 5976 ref_ci, ref_root_bh, dealloc); 5977 } 5978 5979 struct ocfs2_xattr_tree_value_refcount_para { 5980 struct ocfs2_caching_info *ref_ci; 5981 struct buffer_head *ref_root_bh; 5982 struct ocfs2_cached_dealloc_ctxt *dealloc; 5983 }; 5984 5985 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb, 5986 struct ocfs2_xattr_bucket *bucket, 5987 int offset, 5988 struct ocfs2_xattr_value_root **xv, 5989 struct buffer_head **bh) 5990 { 5991 int ret, block_off, name_offset; 5992 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 5993 struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset]; 5994 void *base; 5995 5996 ret = ocfs2_xattr_bucket_get_name_value(sb, 5997 bucket_xh(bucket), 5998 offset, 5999 &block_off, 6000 &name_offset); 6001 if (ret) { 6002 mlog_errno(ret); 6003 goto out; 6004 } 6005 6006 base = bucket_block(bucket, block_off); 6007 6008 *xv = (struct ocfs2_xattr_value_root *)(base + name_offset + 6009 OCFS2_XATTR_SIZE(xe->xe_name_len)); 6010 6011 if (bh) 6012 *bh = bucket->bu_bhs[block_off]; 6013 out: 6014 return ret; 6015 } 6016 6017 /* 6018 * For a given xattr bucket, refcount all the entries which 6019 * have value stored outside. 6020 */ 6021 static int ocfs2_xattr_bucket_value_refcount(struct inode *inode, 6022 struct ocfs2_xattr_bucket *bucket, 6023 void *para) 6024 { 6025 int i, ret = 0; 6026 struct ocfs2_extent_tree et; 6027 struct ocfs2_xattr_tree_value_refcount_para *ref = 6028 (struct ocfs2_xattr_tree_value_refcount_para *)para; 6029 struct ocfs2_xattr_header *xh = 6030 (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data; 6031 struct ocfs2_xattr_entry *xe; 6032 struct ocfs2_xattr_value_buf vb = { 6033 .vb_access = ocfs2_journal_access, 6034 }; 6035 struct ocfs2_post_refcount refcount = { 6036 .credits = bucket->bu_blocks, 6037 .para = bucket, 6038 .func = ocfs2_xattr_bucket_post_refcount, 6039 }; 6040 struct ocfs2_post_refcount *p = NULL; 6041 6042 /* We only need post_refcount if we support metaecc. */ 6043 if (ocfs2_meta_ecc(OCFS2_SB(inode->i_sb))) 6044 p = &refcount; 6045 6046 trace_ocfs2_xattr_bucket_value_refcount( 6047 (unsigned long long)bucket_blkno(bucket), 6048 le16_to_cpu(xh->xh_count)); 6049 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 6050 xe = &xh->xh_entries[i]; 6051 6052 if (ocfs2_xattr_is_local(xe)) 6053 continue; 6054 6055 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, i, 6056 &vb.vb_xv, &vb.vb_bh); 6057 if (ret) { 6058 mlog_errno(ret); 6059 break; 6060 } 6061 6062 ocfs2_init_xattr_value_extent_tree(&et, 6063 INODE_CACHE(inode), &vb); 6064 6065 ret = ocfs2_xattr_value_attach_refcount(inode, vb.vb_xv, 6066 &et, ref->ref_ci, 6067 ref->ref_root_bh, 6068 ref->dealloc, p); 6069 if (ret) { 6070 mlog_errno(ret); 6071 break; 6072 } 6073 } 6074 6075 return ret; 6076 6077 } 6078 6079 static int ocfs2_refcount_xattr_tree_rec(struct inode *inode, 6080 struct buffer_head *root_bh, 6081 u64 blkno, u32 cpos, u32 len, void *para) 6082 { 6083 return ocfs2_iterate_xattr_buckets(inode, blkno, len, 6084 ocfs2_xattr_bucket_value_refcount, 6085 para); 6086 } 6087 6088 static int ocfs2_xattr_block_attach_refcount(struct inode *inode, 6089 struct buffer_head *blk_bh, 6090 struct ocfs2_caching_info *ref_ci, 6091 struct buffer_head *ref_root_bh, 6092 struct ocfs2_cached_dealloc_ctxt *dealloc) 6093 { 6094 int ret = 0; 6095 struct ocfs2_xattr_block *xb = 6096 (struct ocfs2_xattr_block *)blk_bh->b_data; 6097 6098 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 6099 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header; 6100 struct ocfs2_xattr_value_buf vb = { 6101 .vb_bh = blk_bh, 6102 .vb_access = ocfs2_journal_access_xb, 6103 }; 6104 6105 ret = ocfs2_xattr_attach_refcount_normal(inode, &vb, header, 6106 ref_ci, ref_root_bh, 6107 dealloc); 6108 } else { 6109 struct ocfs2_xattr_tree_value_refcount_para para = { 6110 .ref_ci = ref_ci, 6111 .ref_root_bh = ref_root_bh, 6112 .dealloc = dealloc, 6113 }; 6114 6115 ret = ocfs2_iterate_xattr_index_block(inode, blk_bh, 6116 ocfs2_refcount_xattr_tree_rec, 6117 ¶); 6118 } 6119 6120 return ret; 6121 } 6122 6123 int ocfs2_xattr_attach_refcount_tree(struct inode *inode, 6124 struct buffer_head *fe_bh, 6125 struct ocfs2_caching_info *ref_ci, 6126 struct buffer_head *ref_root_bh, 6127 struct ocfs2_cached_dealloc_ctxt *dealloc) 6128 { 6129 int ret = 0; 6130 struct ocfs2_inode_info *oi = OCFS2_I(inode); 6131 struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data; 6132 struct buffer_head *blk_bh = NULL; 6133 6134 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 6135 ret = ocfs2_xattr_inline_attach_refcount(inode, fe_bh, 6136 ref_ci, ref_root_bh, 6137 dealloc); 6138 if (ret) { 6139 mlog_errno(ret); 6140 goto out; 6141 } 6142 } 6143 6144 if (!di->i_xattr_loc) 6145 goto out; 6146 6147 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc), 6148 &blk_bh); 6149 if (ret < 0) { 6150 mlog_errno(ret); 6151 goto out; 6152 } 6153 6154 ret = ocfs2_xattr_block_attach_refcount(inode, blk_bh, ref_ci, 6155 ref_root_bh, dealloc); 6156 if (ret) 6157 mlog_errno(ret); 6158 6159 brelse(blk_bh); 6160 out: 6161 6162 return ret; 6163 } 6164 6165 typedef int (should_xattr_reflinked)(struct ocfs2_xattr_entry *xe); 6166 /* 6167 * Store the information we need in xattr reflink. 6168 * old_bh and new_bh are inode bh for the old and new inode. 6169 */ 6170 struct ocfs2_xattr_reflink { 6171 struct inode *old_inode; 6172 struct inode *new_inode; 6173 struct buffer_head *old_bh; 6174 struct buffer_head *new_bh; 6175 struct ocfs2_caching_info *ref_ci; 6176 struct buffer_head *ref_root_bh; 6177 struct ocfs2_cached_dealloc_ctxt *dealloc; 6178 should_xattr_reflinked *xattr_reflinked; 6179 }; 6180 6181 /* 6182 * Given a xattr header and xe offset, 6183 * return the proper xv and the corresponding bh. 6184 * xattr in inode, block and xattr tree have different implementaions. 6185 */ 6186 typedef int (get_xattr_value_root)(struct super_block *sb, 6187 struct buffer_head *bh, 6188 struct ocfs2_xattr_header *xh, 6189 int offset, 6190 struct ocfs2_xattr_value_root **xv, 6191 struct buffer_head **ret_bh, 6192 void *para); 6193 6194 /* 6195 * Calculate all the xattr value root metadata stored in this xattr header and 6196 * credits we need if we create them from the scratch. 6197 * We use get_xattr_value_root so that all types of xattr container can use it. 6198 */ 6199 static int ocfs2_value_metas_in_xattr_header(struct super_block *sb, 6200 struct buffer_head *bh, 6201 struct ocfs2_xattr_header *xh, 6202 int *metas, int *credits, 6203 int *num_recs, 6204 get_xattr_value_root *func, 6205 void *para) 6206 { 6207 int i, ret = 0; 6208 struct ocfs2_xattr_value_root *xv; 6209 struct ocfs2_xattr_entry *xe; 6210 6211 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 6212 xe = &xh->xh_entries[i]; 6213 if (ocfs2_xattr_is_local(xe)) 6214 continue; 6215 6216 ret = func(sb, bh, xh, i, &xv, NULL, para); 6217 if (ret) { 6218 mlog_errno(ret); 6219 break; 6220 } 6221 6222 *metas += le16_to_cpu(xv->xr_list.l_tree_depth) * 6223 le16_to_cpu(xv->xr_list.l_next_free_rec); 6224 6225 *credits += ocfs2_calc_extend_credits(sb, 6226 &def_xv.xv.xr_list); 6227 6228 /* 6229 * If the value is a tree with depth > 1, We don't go deep 6230 * to the extent block, so just calculate a maximum record num. 6231 */ 6232 if (!xv->xr_list.l_tree_depth) 6233 *num_recs += le16_to_cpu(xv->xr_list.l_next_free_rec); 6234 else 6235 *num_recs += ocfs2_clusters_for_bytes(sb, 6236 XATTR_SIZE_MAX); 6237 } 6238 6239 return ret; 6240 } 6241 6242 /* Used by xattr inode and block to return the right xv and buffer_head. */ 6243 static int ocfs2_get_xattr_value_root(struct super_block *sb, 6244 struct buffer_head *bh, 6245 struct ocfs2_xattr_header *xh, 6246 int offset, 6247 struct ocfs2_xattr_value_root **xv, 6248 struct buffer_head **ret_bh, 6249 void *para) 6250 { 6251 struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset]; 6252 6253 *xv = (struct ocfs2_xattr_value_root *)((void *)xh + 6254 le16_to_cpu(xe->xe_name_offset) + 6255 OCFS2_XATTR_SIZE(xe->xe_name_len)); 6256 6257 if (ret_bh) 6258 *ret_bh = bh; 6259 6260 return 0; 6261 } 6262 6263 /* 6264 * Lock the meta_ac and caculate how much credits we need for reflink xattrs. 6265 * It is only used for inline xattr and xattr block. 6266 */ 6267 static int ocfs2_reflink_lock_xattr_allocators(struct ocfs2_super *osb, 6268 struct ocfs2_xattr_header *xh, 6269 struct buffer_head *ref_root_bh, 6270 int *credits, 6271 struct ocfs2_alloc_context **meta_ac) 6272 { 6273 int ret, meta_add = 0, num_recs = 0; 6274 struct ocfs2_refcount_block *rb = 6275 (struct ocfs2_refcount_block *)ref_root_bh->b_data; 6276 6277 *credits = 0; 6278 6279 ret = ocfs2_value_metas_in_xattr_header(osb->sb, NULL, xh, 6280 &meta_add, credits, &num_recs, 6281 ocfs2_get_xattr_value_root, 6282 NULL); 6283 if (ret) { 6284 mlog_errno(ret); 6285 goto out; 6286 } 6287 6288 /* 6289 * We need to add/modify num_recs in refcount tree, so just calculate 6290 * an approximate number we need for refcount tree change. 6291 * Sometimes we need to split the tree, and after split, half recs 6292 * will be moved to the new block, and a new block can only provide 6293 * half number of recs. So we multiple new blocks by 2. 6294 */ 6295 num_recs = num_recs / ocfs2_refcount_recs_per_rb(osb->sb) * 2; 6296 meta_add += num_recs; 6297 *credits += num_recs + num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS; 6298 if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL) 6299 *credits += le16_to_cpu(rb->rf_list.l_tree_depth) * 6300 le16_to_cpu(rb->rf_list.l_next_free_rec) + 1; 6301 else 6302 *credits += 1; 6303 6304 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, meta_ac); 6305 if (ret) 6306 mlog_errno(ret); 6307 6308 out: 6309 return ret; 6310 } 6311 6312 /* 6313 * Given a xattr header, reflink all the xattrs in this container. 6314 * It can be used for inode, block and bucket. 6315 * 6316 * NOTE: 6317 * Before we call this function, the caller has memcpy the xattr in 6318 * old_xh to the new_xh. 6319 * 6320 * If args.xattr_reflinked is set, call it to decide whether the xe should 6321 * be reflinked or not. If not, remove it from the new xattr header. 6322 */ 6323 static int ocfs2_reflink_xattr_header(handle_t *handle, 6324 struct ocfs2_xattr_reflink *args, 6325 struct buffer_head *old_bh, 6326 struct ocfs2_xattr_header *xh, 6327 struct buffer_head *new_bh, 6328 struct ocfs2_xattr_header *new_xh, 6329 struct ocfs2_xattr_value_buf *vb, 6330 struct ocfs2_alloc_context *meta_ac, 6331 get_xattr_value_root *func, 6332 void *para) 6333 { 6334 int ret = 0, i, j; 6335 struct super_block *sb = args->old_inode->i_sb; 6336 struct buffer_head *value_bh; 6337 struct ocfs2_xattr_entry *xe, *last; 6338 struct ocfs2_xattr_value_root *xv, *new_xv; 6339 struct ocfs2_extent_tree data_et; 6340 u32 clusters, cpos, p_cluster, num_clusters; 6341 unsigned int ext_flags = 0; 6342 6343 trace_ocfs2_reflink_xattr_header((unsigned long long)old_bh->b_blocknr, 6344 le16_to_cpu(xh->xh_count)); 6345 6346 last = &new_xh->xh_entries[le16_to_cpu(new_xh->xh_count)]; 6347 for (i = 0, j = 0; i < le16_to_cpu(xh->xh_count); i++, j++) { 6348 xe = &xh->xh_entries[i]; 6349 6350 if (args->xattr_reflinked && !args->xattr_reflinked(xe)) { 6351 xe = &new_xh->xh_entries[j]; 6352 6353 le16_add_cpu(&new_xh->xh_count, -1); 6354 if (new_xh->xh_count) { 6355 memmove(xe, xe + 1, 6356 (void *)last - (void *)xe); 6357 memset(last, 0, 6358 sizeof(struct ocfs2_xattr_entry)); 6359 } 6360 6361 /* 6362 * We don't want j to increase in the next round since 6363 * it is already moved ahead. 6364 */ 6365 j--; 6366 continue; 6367 } 6368 6369 if (ocfs2_xattr_is_local(xe)) 6370 continue; 6371 6372 ret = func(sb, old_bh, xh, i, &xv, NULL, para); 6373 if (ret) { 6374 mlog_errno(ret); 6375 break; 6376 } 6377 6378 ret = func(sb, new_bh, new_xh, j, &new_xv, &value_bh, para); 6379 if (ret) { 6380 mlog_errno(ret); 6381 break; 6382 } 6383 6384 /* 6385 * For the xattr which has l_tree_depth = 0, all the extent 6386 * recs have already be copied to the new xh with the 6387 * propriate OCFS2_EXT_REFCOUNTED flag we just need to 6388 * increase the refount count int the refcount tree. 6389 * 6390 * For the xattr which has l_tree_depth > 0, we need 6391 * to initialize it to the empty default value root, 6392 * and then insert the extents one by one. 6393 */ 6394 if (xv->xr_list.l_tree_depth) { 6395 memcpy(new_xv, &def_xv, sizeof(def_xv)); 6396 vb->vb_xv = new_xv; 6397 vb->vb_bh = value_bh; 6398 ocfs2_init_xattr_value_extent_tree(&data_et, 6399 INODE_CACHE(args->new_inode), vb); 6400 } 6401 6402 clusters = le32_to_cpu(xv->xr_clusters); 6403 cpos = 0; 6404 while (cpos < clusters) { 6405 ret = ocfs2_xattr_get_clusters(args->old_inode, 6406 cpos, 6407 &p_cluster, 6408 &num_clusters, 6409 &xv->xr_list, 6410 &ext_flags); 6411 if (ret) { 6412 mlog_errno(ret); 6413 goto out; 6414 } 6415 6416 BUG_ON(!p_cluster); 6417 6418 if (xv->xr_list.l_tree_depth) { 6419 ret = ocfs2_insert_extent(handle, 6420 &data_et, cpos, 6421 ocfs2_clusters_to_blocks( 6422 args->old_inode->i_sb, 6423 p_cluster), 6424 num_clusters, ext_flags, 6425 meta_ac); 6426 if (ret) { 6427 mlog_errno(ret); 6428 goto out; 6429 } 6430 } 6431 6432 ret = ocfs2_increase_refcount(handle, args->ref_ci, 6433 args->ref_root_bh, 6434 p_cluster, num_clusters, 6435 meta_ac, args->dealloc); 6436 if (ret) { 6437 mlog_errno(ret); 6438 goto out; 6439 } 6440 6441 cpos += num_clusters; 6442 } 6443 } 6444 6445 out: 6446 return ret; 6447 } 6448 6449 static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args) 6450 { 6451 int ret = 0, credits = 0; 6452 handle_t *handle; 6453 struct ocfs2_super *osb = OCFS2_SB(args->old_inode->i_sb); 6454 struct ocfs2_dinode *di = (struct ocfs2_dinode *)args->old_bh->b_data; 6455 int inline_size = le16_to_cpu(di->i_xattr_inline_size); 6456 int header_off = osb->sb->s_blocksize - inline_size; 6457 struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *) 6458 (args->old_bh->b_data + header_off); 6459 struct ocfs2_xattr_header *new_xh = (struct ocfs2_xattr_header *) 6460 (args->new_bh->b_data + header_off); 6461 struct ocfs2_alloc_context *meta_ac = NULL; 6462 struct ocfs2_inode_info *new_oi; 6463 struct ocfs2_dinode *new_di; 6464 struct ocfs2_xattr_value_buf vb = { 6465 .vb_bh = args->new_bh, 6466 .vb_access = ocfs2_journal_access_di, 6467 }; 6468 6469 ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh, 6470 &credits, &meta_ac); 6471 if (ret) { 6472 mlog_errno(ret); 6473 goto out; 6474 } 6475 6476 handle = ocfs2_start_trans(osb, credits); 6477 if (IS_ERR(handle)) { 6478 ret = PTR_ERR(handle); 6479 mlog_errno(ret); 6480 goto out; 6481 } 6482 6483 ret = ocfs2_journal_access_di(handle, INODE_CACHE(args->new_inode), 6484 args->new_bh, OCFS2_JOURNAL_ACCESS_WRITE); 6485 if (ret) { 6486 mlog_errno(ret); 6487 goto out_commit; 6488 } 6489 6490 memcpy(args->new_bh->b_data + header_off, 6491 args->old_bh->b_data + header_off, inline_size); 6492 6493 new_di = (struct ocfs2_dinode *)args->new_bh->b_data; 6494 new_di->i_xattr_inline_size = cpu_to_le16(inline_size); 6495 6496 ret = ocfs2_reflink_xattr_header(handle, args, args->old_bh, xh, 6497 args->new_bh, new_xh, &vb, meta_ac, 6498 ocfs2_get_xattr_value_root, NULL); 6499 if (ret) { 6500 mlog_errno(ret); 6501 goto out_commit; 6502 } 6503 6504 new_oi = OCFS2_I(args->new_inode); 6505 /* 6506 * Adjust extent record count to reserve space for extended attribute. 6507 * Inline data count had been adjusted in ocfs2_duplicate_inline_data(). 6508 */ 6509 if (!(new_oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) && 6510 !(ocfs2_inode_is_fast_symlink(args->new_inode))) { 6511 struct ocfs2_extent_list *el = &new_di->id2.i_list; 6512 le16_add_cpu(&el->l_count, -(inline_size / 6513 sizeof(struct ocfs2_extent_rec))); 6514 } 6515 spin_lock(&new_oi->ip_lock); 6516 new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL; 6517 new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features); 6518 spin_unlock(&new_oi->ip_lock); 6519 6520 ocfs2_journal_dirty(handle, args->new_bh); 6521 6522 out_commit: 6523 ocfs2_commit_trans(osb, handle); 6524 6525 out: 6526 if (meta_ac) 6527 ocfs2_free_alloc_context(meta_ac); 6528 return ret; 6529 } 6530 6531 static int ocfs2_create_empty_xattr_block(struct inode *inode, 6532 struct buffer_head *fe_bh, 6533 struct buffer_head **ret_bh, 6534 int indexed) 6535 { 6536 int ret; 6537 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 6538 struct ocfs2_xattr_set_ctxt ctxt; 6539 6540 memset(&ctxt, 0, sizeof(ctxt)); 6541 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &ctxt.meta_ac); 6542 if (ret < 0) { 6543 mlog_errno(ret); 6544 return ret; 6545 } 6546 6547 ctxt.handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_CREATE_CREDITS); 6548 if (IS_ERR(ctxt.handle)) { 6549 ret = PTR_ERR(ctxt.handle); 6550 mlog_errno(ret); 6551 goto out; 6552 } 6553 6554 trace_ocfs2_create_empty_xattr_block( 6555 (unsigned long long)fe_bh->b_blocknr, indexed); 6556 ret = ocfs2_create_xattr_block(inode, fe_bh, &ctxt, indexed, 6557 ret_bh); 6558 if (ret) 6559 mlog_errno(ret); 6560 6561 ocfs2_commit_trans(osb, ctxt.handle); 6562 out: 6563 ocfs2_free_alloc_context(ctxt.meta_ac); 6564 return ret; 6565 } 6566 6567 static int ocfs2_reflink_xattr_block(struct ocfs2_xattr_reflink *args, 6568 struct buffer_head *blk_bh, 6569 struct buffer_head *new_blk_bh) 6570 { 6571 int ret = 0, credits = 0; 6572 handle_t *handle; 6573 struct ocfs2_inode_info *new_oi = OCFS2_I(args->new_inode); 6574 struct ocfs2_dinode *new_di; 6575 struct ocfs2_super *osb = OCFS2_SB(args->new_inode->i_sb); 6576 int header_off = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); 6577 struct ocfs2_xattr_block *xb = 6578 (struct ocfs2_xattr_block *)blk_bh->b_data; 6579 struct ocfs2_xattr_header *xh = &xb->xb_attrs.xb_header; 6580 struct ocfs2_xattr_block *new_xb = 6581 (struct ocfs2_xattr_block *)new_blk_bh->b_data; 6582 struct ocfs2_xattr_header *new_xh = &new_xb->xb_attrs.xb_header; 6583 struct ocfs2_alloc_context *meta_ac; 6584 struct ocfs2_xattr_value_buf vb = { 6585 .vb_bh = new_blk_bh, 6586 .vb_access = ocfs2_journal_access_xb, 6587 }; 6588 6589 ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh, 6590 &credits, &meta_ac); 6591 if (ret) { 6592 mlog_errno(ret); 6593 return ret; 6594 } 6595 6596 /* One more credits in case we need to add xattr flags in new inode. */ 6597 handle = ocfs2_start_trans(osb, credits + 1); 6598 if (IS_ERR(handle)) { 6599 ret = PTR_ERR(handle); 6600 mlog_errno(ret); 6601 goto out; 6602 } 6603 6604 if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) { 6605 ret = ocfs2_journal_access_di(handle, 6606 INODE_CACHE(args->new_inode), 6607 args->new_bh, 6608 OCFS2_JOURNAL_ACCESS_WRITE); 6609 if (ret) { 6610 mlog_errno(ret); 6611 goto out_commit; 6612 } 6613 } 6614 6615 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(args->new_inode), 6616 new_blk_bh, OCFS2_JOURNAL_ACCESS_WRITE); 6617 if (ret) { 6618 mlog_errno(ret); 6619 goto out_commit; 6620 } 6621 6622 memcpy(new_blk_bh->b_data + header_off, blk_bh->b_data + header_off, 6623 osb->sb->s_blocksize - header_off); 6624 6625 ret = ocfs2_reflink_xattr_header(handle, args, blk_bh, xh, 6626 new_blk_bh, new_xh, &vb, meta_ac, 6627 ocfs2_get_xattr_value_root, NULL); 6628 if (ret) { 6629 mlog_errno(ret); 6630 goto out_commit; 6631 } 6632 6633 ocfs2_journal_dirty(handle, new_blk_bh); 6634 6635 if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) { 6636 new_di = (struct ocfs2_dinode *)args->new_bh->b_data; 6637 spin_lock(&new_oi->ip_lock); 6638 new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL; 6639 new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features); 6640 spin_unlock(&new_oi->ip_lock); 6641 6642 ocfs2_journal_dirty(handle, args->new_bh); 6643 } 6644 6645 out_commit: 6646 ocfs2_commit_trans(osb, handle); 6647 6648 out: 6649 ocfs2_free_alloc_context(meta_ac); 6650 return ret; 6651 } 6652 6653 struct ocfs2_reflink_xattr_tree_args { 6654 struct ocfs2_xattr_reflink *reflink; 6655 struct buffer_head *old_blk_bh; 6656 struct buffer_head *new_blk_bh; 6657 struct ocfs2_xattr_bucket *old_bucket; 6658 struct ocfs2_xattr_bucket *new_bucket; 6659 }; 6660 6661 /* 6662 * NOTE: 6663 * We have to handle the case that both old bucket and new bucket 6664 * will call this function to get the right ret_bh. 6665 * So The caller must give us the right bh. 6666 */ 6667 static int ocfs2_get_reflink_xattr_value_root(struct super_block *sb, 6668 struct buffer_head *bh, 6669 struct ocfs2_xattr_header *xh, 6670 int offset, 6671 struct ocfs2_xattr_value_root **xv, 6672 struct buffer_head **ret_bh, 6673 void *para) 6674 { 6675 struct ocfs2_reflink_xattr_tree_args *args = 6676 (struct ocfs2_reflink_xattr_tree_args *)para; 6677 struct ocfs2_xattr_bucket *bucket; 6678 6679 if (bh == args->old_bucket->bu_bhs[0]) 6680 bucket = args->old_bucket; 6681 else 6682 bucket = args->new_bucket; 6683 6684 return ocfs2_get_xattr_tree_value_root(sb, bucket, offset, 6685 xv, ret_bh); 6686 } 6687 6688 struct ocfs2_value_tree_metas { 6689 int num_metas; 6690 int credits; 6691 int num_recs; 6692 }; 6693 6694 static int ocfs2_value_tree_metas_in_bucket(struct super_block *sb, 6695 struct buffer_head *bh, 6696 struct ocfs2_xattr_header *xh, 6697 int offset, 6698 struct ocfs2_xattr_value_root **xv, 6699 struct buffer_head **ret_bh, 6700 void *para) 6701 { 6702 struct ocfs2_xattr_bucket *bucket = 6703 (struct ocfs2_xattr_bucket *)para; 6704 6705 return ocfs2_get_xattr_tree_value_root(sb, bucket, offset, 6706 xv, ret_bh); 6707 } 6708 6709 static int ocfs2_calc_value_tree_metas(struct inode *inode, 6710 struct ocfs2_xattr_bucket *bucket, 6711 void *para) 6712 { 6713 struct ocfs2_value_tree_metas *metas = 6714 (struct ocfs2_value_tree_metas *)para; 6715 struct ocfs2_xattr_header *xh = 6716 (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data; 6717 6718 /* Add the credits for this bucket first. */ 6719 metas->credits += bucket->bu_blocks; 6720 return ocfs2_value_metas_in_xattr_header(inode->i_sb, bucket->bu_bhs[0], 6721 xh, &metas->num_metas, 6722 &metas->credits, &metas->num_recs, 6723 ocfs2_value_tree_metas_in_bucket, 6724 bucket); 6725 } 6726 6727 /* 6728 * Given a xattr extent rec starting from blkno and having len clusters, 6729 * iterate all the buckets calculate how much metadata we need for reflinking 6730 * all the ocfs2_xattr_value_root and lock the allocators accordingly. 6731 */ 6732 static int ocfs2_lock_reflink_xattr_rec_allocators( 6733 struct ocfs2_reflink_xattr_tree_args *args, 6734 struct ocfs2_extent_tree *xt_et, 6735 u64 blkno, u32 len, int *credits, 6736 struct ocfs2_alloc_context **meta_ac, 6737 struct ocfs2_alloc_context **data_ac) 6738 { 6739 int ret, num_free_extents; 6740 struct ocfs2_value_tree_metas metas; 6741 struct ocfs2_super *osb = OCFS2_SB(args->reflink->old_inode->i_sb); 6742 struct ocfs2_refcount_block *rb; 6743 6744 memset(&metas, 0, sizeof(metas)); 6745 6746 ret = ocfs2_iterate_xattr_buckets(args->reflink->old_inode, blkno, len, 6747 ocfs2_calc_value_tree_metas, &metas); 6748 if (ret) { 6749 mlog_errno(ret); 6750 goto out; 6751 } 6752 6753 *credits = metas.credits; 6754 6755 /* 6756 * Calculate we need for refcount tree change. 6757 * 6758 * We need to add/modify num_recs in refcount tree, so just calculate 6759 * an approximate number we need for refcount tree change. 6760 * Sometimes we need to split the tree, and after split, half recs 6761 * will be moved to the new block, and a new block can only provide 6762 * half number of recs. So we multiple new blocks by 2. 6763 * In the end, we have to add credits for modifying the already 6764 * existed refcount block. 6765 */ 6766 rb = (struct ocfs2_refcount_block *)args->reflink->ref_root_bh->b_data; 6767 metas.num_recs = 6768 (metas.num_recs + ocfs2_refcount_recs_per_rb(osb->sb) - 1) / 6769 ocfs2_refcount_recs_per_rb(osb->sb) * 2; 6770 metas.num_metas += metas.num_recs; 6771 *credits += metas.num_recs + 6772 metas.num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS; 6773 if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL) 6774 *credits += le16_to_cpu(rb->rf_list.l_tree_depth) * 6775 le16_to_cpu(rb->rf_list.l_next_free_rec) + 1; 6776 else 6777 *credits += 1; 6778 6779 /* count in the xattr tree change. */ 6780 num_free_extents = ocfs2_num_free_extents(osb, xt_et); 6781 if (num_free_extents < 0) { 6782 ret = num_free_extents; 6783 mlog_errno(ret); 6784 goto out; 6785 } 6786 6787 if (num_free_extents < len) 6788 metas.num_metas += ocfs2_extend_meta_needed(xt_et->et_root_el); 6789 6790 *credits += ocfs2_calc_extend_credits(osb->sb, 6791 xt_et->et_root_el); 6792 6793 if (metas.num_metas) { 6794 ret = ocfs2_reserve_new_metadata_blocks(osb, metas.num_metas, 6795 meta_ac); 6796 if (ret) { 6797 mlog_errno(ret); 6798 goto out; 6799 } 6800 } 6801 6802 if (len) { 6803 ret = ocfs2_reserve_clusters(osb, len, data_ac); 6804 if (ret) 6805 mlog_errno(ret); 6806 } 6807 out: 6808 if (ret) { 6809 if (*meta_ac) { 6810 ocfs2_free_alloc_context(*meta_ac); 6811 *meta_ac = NULL; 6812 } 6813 } 6814 6815 return ret; 6816 } 6817 6818 static int ocfs2_reflink_xattr_bucket(handle_t *handle, 6819 u64 blkno, u64 new_blkno, u32 clusters, 6820 u32 *cpos, int num_buckets, 6821 struct ocfs2_alloc_context *meta_ac, 6822 struct ocfs2_alloc_context *data_ac, 6823 struct ocfs2_reflink_xattr_tree_args *args) 6824 { 6825 int i, j, ret = 0; 6826 struct super_block *sb = args->reflink->old_inode->i_sb; 6827 int bpb = args->old_bucket->bu_blocks; 6828 struct ocfs2_xattr_value_buf vb = { 6829 .vb_access = ocfs2_journal_access, 6830 }; 6831 6832 for (i = 0; i < num_buckets; i++, blkno += bpb, new_blkno += bpb) { 6833 ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno); 6834 if (ret) { 6835 mlog_errno(ret); 6836 break; 6837 } 6838 6839 ret = ocfs2_init_xattr_bucket(args->new_bucket, new_blkno, 1); 6840 if (ret) { 6841 mlog_errno(ret); 6842 break; 6843 } 6844 6845 ret = ocfs2_xattr_bucket_journal_access(handle, 6846 args->new_bucket, 6847 OCFS2_JOURNAL_ACCESS_CREATE); 6848 if (ret) { 6849 mlog_errno(ret); 6850 break; 6851 } 6852 6853 for (j = 0; j < bpb; j++) 6854 memcpy(bucket_block(args->new_bucket, j), 6855 bucket_block(args->old_bucket, j), 6856 sb->s_blocksize); 6857 6858 /* 6859 * Record the start cpos so that we can use it to initialize 6860 * our xattr tree we also set the xh_num_bucket for the new 6861 * bucket. 6862 */ 6863 if (i == 0) { 6864 *cpos = le32_to_cpu(bucket_xh(args->new_bucket)-> 6865 xh_entries[0].xe_name_hash); 6866 bucket_xh(args->new_bucket)->xh_num_buckets = 6867 cpu_to_le16(num_buckets); 6868 } 6869 6870 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket); 6871 6872 ret = ocfs2_reflink_xattr_header(handle, args->reflink, 6873 args->old_bucket->bu_bhs[0], 6874 bucket_xh(args->old_bucket), 6875 args->new_bucket->bu_bhs[0], 6876 bucket_xh(args->new_bucket), 6877 &vb, meta_ac, 6878 ocfs2_get_reflink_xattr_value_root, 6879 args); 6880 if (ret) { 6881 mlog_errno(ret); 6882 break; 6883 } 6884 6885 /* 6886 * Re-access and dirty the bucket to calculate metaecc. 6887 * Because we may extend the transaction in reflink_xattr_header 6888 * which will let the already accessed block gone. 6889 */ 6890 ret = ocfs2_xattr_bucket_journal_access(handle, 6891 args->new_bucket, 6892 OCFS2_JOURNAL_ACCESS_WRITE); 6893 if (ret) { 6894 mlog_errno(ret); 6895 break; 6896 } 6897 6898 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket); 6899 6900 ocfs2_xattr_bucket_relse(args->old_bucket); 6901 ocfs2_xattr_bucket_relse(args->new_bucket); 6902 } 6903 6904 ocfs2_xattr_bucket_relse(args->old_bucket); 6905 ocfs2_xattr_bucket_relse(args->new_bucket); 6906 return ret; 6907 } 6908 6909 static int ocfs2_reflink_xattr_buckets(handle_t *handle, 6910 struct inode *inode, 6911 struct ocfs2_reflink_xattr_tree_args *args, 6912 struct ocfs2_extent_tree *et, 6913 struct ocfs2_alloc_context *meta_ac, 6914 struct ocfs2_alloc_context *data_ac, 6915 u64 blkno, u32 cpos, u32 len) 6916 { 6917 int ret, first_inserted = 0; 6918 u32 p_cluster, num_clusters, reflink_cpos = 0; 6919 u64 new_blkno; 6920 unsigned int num_buckets, reflink_buckets; 6921 unsigned int bpc = 6922 ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)); 6923 6924 ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno); 6925 if (ret) { 6926 mlog_errno(ret); 6927 goto out; 6928 } 6929 num_buckets = le16_to_cpu(bucket_xh(args->old_bucket)->xh_num_buckets); 6930 ocfs2_xattr_bucket_relse(args->old_bucket); 6931 6932 while (len && num_buckets) { 6933 ret = ocfs2_claim_clusters(handle, data_ac, 6934 1, &p_cluster, &num_clusters); 6935 if (ret) { 6936 mlog_errno(ret); 6937 goto out; 6938 } 6939 6940 new_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 6941 reflink_buckets = min(num_buckets, bpc * num_clusters); 6942 6943 ret = ocfs2_reflink_xattr_bucket(handle, blkno, 6944 new_blkno, num_clusters, 6945 &reflink_cpos, reflink_buckets, 6946 meta_ac, data_ac, args); 6947 if (ret) { 6948 mlog_errno(ret); 6949 goto out; 6950 } 6951 6952 /* 6953 * For the 1st allocated cluster, we make it use the same cpos 6954 * so that the xattr tree looks the same as the original one 6955 * in the most case. 6956 */ 6957 if (!first_inserted) { 6958 reflink_cpos = cpos; 6959 first_inserted = 1; 6960 } 6961 ret = ocfs2_insert_extent(handle, et, reflink_cpos, new_blkno, 6962 num_clusters, 0, meta_ac); 6963 if (ret) 6964 mlog_errno(ret); 6965 6966 trace_ocfs2_reflink_xattr_buckets((unsigned long long)new_blkno, 6967 num_clusters, reflink_cpos); 6968 6969 len -= num_clusters; 6970 blkno += ocfs2_clusters_to_blocks(inode->i_sb, num_clusters); 6971 num_buckets -= reflink_buckets; 6972 } 6973 out: 6974 return ret; 6975 } 6976 6977 /* 6978 * Create the same xattr extent record in the new inode's xattr tree. 6979 */ 6980 static int ocfs2_reflink_xattr_rec(struct inode *inode, 6981 struct buffer_head *root_bh, 6982 u64 blkno, 6983 u32 cpos, 6984 u32 len, 6985 void *para) 6986 { 6987 int ret, credits = 0; 6988 handle_t *handle; 6989 struct ocfs2_reflink_xattr_tree_args *args = 6990 (struct ocfs2_reflink_xattr_tree_args *)para; 6991 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 6992 struct ocfs2_alloc_context *meta_ac = NULL; 6993 struct ocfs2_alloc_context *data_ac = NULL; 6994 struct ocfs2_extent_tree et; 6995 6996 trace_ocfs2_reflink_xattr_rec((unsigned long long)blkno, len); 6997 6998 ocfs2_init_xattr_tree_extent_tree(&et, 6999 INODE_CACHE(args->reflink->new_inode), 7000 args->new_blk_bh); 7001 7002 ret = ocfs2_lock_reflink_xattr_rec_allocators(args, &et, blkno, 7003 len, &credits, 7004 &meta_ac, &data_ac); 7005 if (ret) { 7006 mlog_errno(ret); 7007 goto out; 7008 } 7009 7010 handle = ocfs2_start_trans(osb, credits); 7011 if (IS_ERR(handle)) { 7012 ret = PTR_ERR(handle); 7013 mlog_errno(ret); 7014 goto out; 7015 } 7016 7017 ret = ocfs2_reflink_xattr_buckets(handle, inode, args, &et, 7018 meta_ac, data_ac, 7019 blkno, cpos, len); 7020 if (ret) 7021 mlog_errno(ret); 7022 7023 ocfs2_commit_trans(osb, handle); 7024 7025 out: 7026 if (meta_ac) 7027 ocfs2_free_alloc_context(meta_ac); 7028 if (data_ac) 7029 ocfs2_free_alloc_context(data_ac); 7030 return ret; 7031 } 7032 7033 /* 7034 * Create reflinked xattr buckets. 7035 * We will add bucket one by one, and refcount all the xattrs in the bucket 7036 * if they are stored outside. 7037 */ 7038 static int ocfs2_reflink_xattr_tree(struct ocfs2_xattr_reflink *args, 7039 struct buffer_head *blk_bh, 7040 struct buffer_head *new_blk_bh) 7041 { 7042 int ret; 7043 struct ocfs2_reflink_xattr_tree_args para; 7044 7045 memset(¶, 0, sizeof(para)); 7046 para.reflink = args; 7047 para.old_blk_bh = blk_bh; 7048 para.new_blk_bh = new_blk_bh; 7049 7050 para.old_bucket = ocfs2_xattr_bucket_new(args->old_inode); 7051 if (!para.old_bucket) { 7052 mlog_errno(-ENOMEM); 7053 return -ENOMEM; 7054 } 7055 7056 para.new_bucket = ocfs2_xattr_bucket_new(args->new_inode); 7057 if (!para.new_bucket) { 7058 ret = -ENOMEM; 7059 mlog_errno(ret); 7060 goto out; 7061 } 7062 7063 ret = ocfs2_iterate_xattr_index_block(args->old_inode, blk_bh, 7064 ocfs2_reflink_xattr_rec, 7065 ¶); 7066 if (ret) 7067 mlog_errno(ret); 7068 7069 out: 7070 ocfs2_xattr_bucket_free(para.old_bucket); 7071 ocfs2_xattr_bucket_free(para.new_bucket); 7072 return ret; 7073 } 7074 7075 static int ocfs2_reflink_xattr_in_block(struct ocfs2_xattr_reflink *args, 7076 struct buffer_head *blk_bh) 7077 { 7078 int ret, indexed = 0; 7079 struct buffer_head *new_blk_bh = NULL; 7080 struct ocfs2_xattr_block *xb = 7081 (struct ocfs2_xattr_block *)blk_bh->b_data; 7082 7083 7084 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) 7085 indexed = 1; 7086 7087 ret = ocfs2_create_empty_xattr_block(args->new_inode, args->new_bh, 7088 &new_blk_bh, indexed); 7089 if (ret) { 7090 mlog_errno(ret); 7091 goto out; 7092 } 7093 7094 if (!indexed) 7095 ret = ocfs2_reflink_xattr_block(args, blk_bh, new_blk_bh); 7096 else 7097 ret = ocfs2_reflink_xattr_tree(args, blk_bh, new_blk_bh); 7098 if (ret) 7099 mlog_errno(ret); 7100 7101 out: 7102 brelse(new_blk_bh); 7103 return ret; 7104 } 7105 7106 static int ocfs2_reflink_xattr_no_security(struct ocfs2_xattr_entry *xe) 7107 { 7108 int type = ocfs2_xattr_get_type(xe); 7109 7110 return type != OCFS2_XATTR_INDEX_SECURITY && 7111 type != OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS && 7112 type != OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT; 7113 } 7114 7115 int ocfs2_reflink_xattrs(struct inode *old_inode, 7116 struct buffer_head *old_bh, 7117 struct inode *new_inode, 7118 struct buffer_head *new_bh, 7119 bool preserve_security) 7120 { 7121 int ret; 7122 struct ocfs2_xattr_reflink args; 7123 struct ocfs2_inode_info *oi = OCFS2_I(old_inode); 7124 struct ocfs2_dinode *di = (struct ocfs2_dinode *)old_bh->b_data; 7125 struct buffer_head *blk_bh = NULL; 7126 struct ocfs2_cached_dealloc_ctxt dealloc; 7127 struct ocfs2_refcount_tree *ref_tree; 7128 struct buffer_head *ref_root_bh = NULL; 7129 7130 ret = ocfs2_lock_refcount_tree(OCFS2_SB(old_inode->i_sb), 7131 le64_to_cpu(di->i_refcount_loc), 7132 1, &ref_tree, &ref_root_bh); 7133 if (ret) { 7134 mlog_errno(ret); 7135 goto out; 7136 } 7137 7138 ocfs2_init_dealloc_ctxt(&dealloc); 7139 7140 args.old_inode = old_inode; 7141 args.new_inode = new_inode; 7142 args.old_bh = old_bh; 7143 args.new_bh = new_bh; 7144 args.ref_ci = &ref_tree->rf_ci; 7145 args.ref_root_bh = ref_root_bh; 7146 args.dealloc = &dealloc; 7147 if (preserve_security) 7148 args.xattr_reflinked = NULL; 7149 else 7150 args.xattr_reflinked = ocfs2_reflink_xattr_no_security; 7151 7152 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 7153 ret = ocfs2_reflink_xattr_inline(&args); 7154 if (ret) { 7155 mlog_errno(ret); 7156 goto out_unlock; 7157 } 7158 } 7159 7160 if (!di->i_xattr_loc) 7161 goto out_unlock; 7162 7163 ret = ocfs2_read_xattr_block(old_inode, le64_to_cpu(di->i_xattr_loc), 7164 &blk_bh); 7165 if (ret < 0) { 7166 mlog_errno(ret); 7167 goto out_unlock; 7168 } 7169 7170 ret = ocfs2_reflink_xattr_in_block(&args, blk_bh); 7171 if (ret) 7172 mlog_errno(ret); 7173 7174 brelse(blk_bh); 7175 7176 out_unlock: 7177 ocfs2_unlock_refcount_tree(OCFS2_SB(old_inode->i_sb), 7178 ref_tree, 1); 7179 brelse(ref_root_bh); 7180 7181 if (ocfs2_dealloc_has_cluster(&dealloc)) { 7182 ocfs2_schedule_truncate_log_flush(OCFS2_SB(old_inode->i_sb), 1); 7183 ocfs2_run_deallocs(OCFS2_SB(old_inode->i_sb), &dealloc); 7184 } 7185 7186 out: 7187 return ret; 7188 } 7189 7190 /* 7191 * Initialize security and acl for a already created inode. 7192 * Used for reflink a non-preserve-security file. 7193 * 7194 * It uses common api like ocfs2_xattr_set, so the caller 7195 * must not hold any lock expect i_mutex. 7196 */ 7197 int ocfs2_init_security_and_acl(struct inode *dir, 7198 struct inode *inode, 7199 const struct qstr *qstr, 7200 struct posix_acl *default_acl, 7201 struct posix_acl *acl) 7202 { 7203 struct buffer_head *dir_bh = NULL; 7204 int ret = 0; 7205 7206 ret = ocfs2_init_security_get(inode, dir, qstr, NULL); 7207 if (ret) { 7208 mlog_errno(ret); 7209 goto leave; 7210 } 7211 7212 ret = ocfs2_inode_lock(dir, &dir_bh, 0); 7213 if (ret) { 7214 mlog_errno(ret); 7215 goto leave; 7216 } 7217 7218 if (!ret && default_acl) 7219 ret = ocfs2_iop_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); 7220 if (!ret && acl) 7221 ret = ocfs2_iop_set_acl(inode, acl, ACL_TYPE_ACCESS); 7222 7223 ocfs2_inode_unlock(dir, 0); 7224 brelse(dir_bh); 7225 leave: 7226 return ret; 7227 } 7228 /* 7229 * 'security' attributes support 7230 */ 7231 static size_t ocfs2_xattr_security_list(const struct xattr_handler *handler, 7232 struct dentry *dentry, char *list, 7233 size_t list_size, const char *name, 7234 size_t name_len) 7235 { 7236 const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN; 7237 const size_t total_len = prefix_len + name_len + 1; 7238 7239 if (list && total_len <= list_size) { 7240 memcpy(list, XATTR_SECURITY_PREFIX, prefix_len); 7241 memcpy(list + prefix_len, name, name_len); 7242 list[prefix_len + name_len] = '\0'; 7243 } 7244 return total_len; 7245 } 7246 7247 static int ocfs2_xattr_security_get(const struct xattr_handler *handler, 7248 struct dentry *dentry, const char *name, 7249 void *buffer, size_t size) 7250 { 7251 return ocfs2_xattr_get(d_inode(dentry), OCFS2_XATTR_INDEX_SECURITY, 7252 name, buffer, size); 7253 } 7254 7255 static int ocfs2_xattr_security_set(const struct xattr_handler *handler, 7256 struct dentry *dentry, const char *name, 7257 const void *value, size_t size, int flags) 7258 { 7259 return ocfs2_xattr_set(d_inode(dentry), OCFS2_XATTR_INDEX_SECURITY, 7260 name, value, size, flags); 7261 } 7262 7263 static int ocfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array, 7264 void *fs_info) 7265 { 7266 const struct xattr *xattr; 7267 int err = 0; 7268 7269 for (xattr = xattr_array; xattr->name != NULL; xattr++) { 7270 err = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, 7271 xattr->name, xattr->value, 7272 xattr->value_len, XATTR_CREATE); 7273 if (err) 7274 break; 7275 } 7276 return err; 7277 } 7278 7279 int ocfs2_init_security_get(struct inode *inode, 7280 struct inode *dir, 7281 const struct qstr *qstr, 7282 struct ocfs2_security_xattr_info *si) 7283 { 7284 /* check whether ocfs2 support feature xattr */ 7285 if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb))) 7286 return -EOPNOTSUPP; 7287 if (si) 7288 return security_old_inode_init_security(inode, dir, qstr, 7289 &si->name, &si->value, 7290 &si->value_len); 7291 7292 return security_inode_init_security(inode, dir, qstr, 7293 &ocfs2_initxattrs, NULL); 7294 } 7295 7296 int ocfs2_init_security_set(handle_t *handle, 7297 struct inode *inode, 7298 struct buffer_head *di_bh, 7299 struct ocfs2_security_xattr_info *si, 7300 struct ocfs2_alloc_context *xattr_ac, 7301 struct ocfs2_alloc_context *data_ac) 7302 { 7303 return ocfs2_xattr_set_handle(handle, inode, di_bh, 7304 OCFS2_XATTR_INDEX_SECURITY, 7305 si->name, si->value, si->value_len, 0, 7306 xattr_ac, data_ac); 7307 } 7308 7309 const struct xattr_handler ocfs2_xattr_security_handler = { 7310 .prefix = XATTR_SECURITY_PREFIX, 7311 .list = ocfs2_xattr_security_list, 7312 .get = ocfs2_xattr_security_get, 7313 .set = ocfs2_xattr_security_set, 7314 }; 7315 7316 /* 7317 * 'trusted' attributes support 7318 */ 7319 static size_t ocfs2_xattr_trusted_list(const struct xattr_handler *handler, 7320 struct dentry *dentry, char *list, 7321 size_t list_size, const char *name, 7322 size_t name_len) 7323 { 7324 const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN; 7325 const size_t total_len = prefix_len + name_len + 1; 7326 7327 if (!capable(CAP_SYS_ADMIN)) 7328 return 0; 7329 7330 if (list && total_len <= list_size) { 7331 memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len); 7332 memcpy(list + prefix_len, name, name_len); 7333 list[prefix_len + name_len] = '\0'; 7334 } 7335 return total_len; 7336 } 7337 7338 static int ocfs2_xattr_trusted_get(const struct xattr_handler *handler, 7339 struct dentry *dentry, const char *name, 7340 void *buffer, size_t size) 7341 { 7342 return ocfs2_xattr_get(d_inode(dentry), OCFS2_XATTR_INDEX_TRUSTED, 7343 name, buffer, size); 7344 } 7345 7346 static int ocfs2_xattr_trusted_set(const struct xattr_handler *handler, 7347 struct dentry *dentry, const char *name, 7348 const void *value, size_t size, int flags) 7349 { 7350 return ocfs2_xattr_set(d_inode(dentry), OCFS2_XATTR_INDEX_TRUSTED, 7351 name, value, size, flags); 7352 } 7353 7354 const struct xattr_handler ocfs2_xattr_trusted_handler = { 7355 .prefix = XATTR_TRUSTED_PREFIX, 7356 .list = ocfs2_xattr_trusted_list, 7357 .get = ocfs2_xattr_trusted_get, 7358 .set = ocfs2_xattr_trusted_set, 7359 }; 7360 7361 /* 7362 * 'user' attributes support 7363 */ 7364 static size_t ocfs2_xattr_user_list(const struct xattr_handler *handler, 7365 struct dentry *dentry, char *list, 7366 size_t list_size, const char *name, 7367 size_t name_len) 7368 { 7369 const size_t prefix_len = XATTR_USER_PREFIX_LEN; 7370 const size_t total_len = prefix_len + name_len + 1; 7371 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); 7372 7373 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 7374 return 0; 7375 7376 if (list && total_len <= list_size) { 7377 memcpy(list, XATTR_USER_PREFIX, prefix_len); 7378 memcpy(list + prefix_len, name, name_len); 7379 list[prefix_len + name_len] = '\0'; 7380 } 7381 return total_len; 7382 } 7383 7384 static int ocfs2_xattr_user_get(const struct xattr_handler *handler, 7385 struct dentry *dentry, const char *name, 7386 void *buffer, size_t size) 7387 { 7388 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); 7389 7390 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 7391 return -EOPNOTSUPP; 7392 return ocfs2_xattr_get(d_inode(dentry), OCFS2_XATTR_INDEX_USER, name, 7393 buffer, size); 7394 } 7395 7396 static int ocfs2_xattr_user_set(const struct xattr_handler *handler, 7397 struct dentry *dentry, const char *name, 7398 const void *value, size_t size, int flags) 7399 { 7400 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); 7401 7402 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 7403 return -EOPNOTSUPP; 7404 7405 return ocfs2_xattr_set(d_inode(dentry), OCFS2_XATTR_INDEX_USER, 7406 name, value, size, flags); 7407 } 7408 7409 const struct xattr_handler ocfs2_xattr_user_handler = { 7410 .prefix = XATTR_USER_PREFIX, 7411 .list = ocfs2_xattr_user_list, 7412 .get = ocfs2_xattr_user_get, 7413 .set = ocfs2_xattr_user_set, 7414 }; 7415