1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * linux/fs/ext4/xattr.c 4 * 5 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> 6 * 7 * Fix by Harrison Xing <harrison@mountainviewdata.com>. 8 * Ext4 code with a lot of help from Eric Jarman <ejarman@acm.org>. 9 * Extended attributes for symlinks and special files added per 10 * suggestion of Luka Renko <luka.renko@hermes.si>. 11 * xattr consolidation Copyright (c) 2004 James Morris <jmorris@redhat.com>, 12 * Red Hat Inc. 13 * ea-in-inode support by Alex Tomas <alex@clusterfs.com> aka bzzz 14 * and Andreas Gruenbacher <agruen@suse.de>. 15 */ 16 17 /* 18 * Extended attributes are stored directly in inodes (on file systems with 19 * inodes bigger than 128 bytes) and on additional disk blocks. The i_file_acl 20 * field contains the block number if an inode uses an additional block. All 21 * attributes must fit in the inode and one additional block. Blocks that 22 * contain the identical set of attributes may be shared among several inodes. 23 * Identical blocks are detected by keeping a cache of blocks that have 24 * recently been accessed. 25 * 26 * The attributes in inodes and on blocks have a different header; the entries 27 * are stored in the same format: 28 * 29 * +------------------+ 30 * | header | 31 * | entry 1 | | 32 * | entry 2 | | growing downwards 33 * | entry 3 | v 34 * | four null bytes | 35 * | . . . | 36 * | value 1 | ^ 37 * | value 3 | | growing upwards 38 * | value 2 | | 39 * +------------------+ 40 * 41 * The header is followed by multiple entry descriptors. In disk blocks, the 42 * entry descriptors are kept sorted. In inodes, they are unsorted. The 43 * attribute values are aligned to the end of the block in no specific order. 44 * 45 * Locking strategy 46 * ---------------- 47 * EXT4_I(inode)->i_file_acl is protected by EXT4_I(inode)->xattr_sem. 48 * EA blocks are only changed if they are exclusive to an inode, so 49 * holding xattr_sem also means that nothing but the EA block's reference 50 * count can change. Multiple writers to the same block are synchronized 51 * by the buffer lock. 52 */ 53 54 #include <linux/init.h> 55 #include <linux/fs.h> 56 #include <linux/slab.h> 57 #include <linux/mbcache.h> 58 #include <linux/quotaops.h> 59 #include <linux/iversion.h> 60 #include "ext4_jbd2.h" 61 #include "ext4.h" 62 #include "xattr.h" 63 #include "acl.h" 64 65 #ifdef EXT4_XATTR_DEBUG 66 # define ea_idebug(inode, fmt, ...) \ 67 printk(KERN_DEBUG "inode %s:%lu: " fmt "\n", \ 68 inode->i_sb->s_id, inode->i_ino, ##__VA_ARGS__) 69 # define ea_bdebug(bh, fmt, ...) \ 70 printk(KERN_DEBUG "block %pg:%lu: " fmt "\n", \ 71 bh->b_bdev, (unsigned long)bh->b_blocknr, ##__VA_ARGS__) 72 #else 73 # define ea_idebug(inode, fmt, ...) no_printk(fmt, ##__VA_ARGS__) 74 # define ea_bdebug(bh, fmt, ...) no_printk(fmt, ##__VA_ARGS__) 75 #endif 76 77 static void ext4_xattr_block_cache_insert(struct mb_cache *, 78 struct buffer_head *); 79 static struct buffer_head * 80 ext4_xattr_block_cache_find(struct inode *, struct ext4_xattr_header *, 81 struct mb_cache_entry **); 82 static __le32 ext4_xattr_hash_entry(char *name, size_t name_len, __le32 *value, 83 size_t value_count); 84 static __le32 ext4_xattr_hash_entry_signed(char *name, size_t name_len, __le32 *value, 85 size_t value_count); 86 static void ext4_xattr_rehash(struct ext4_xattr_header *); 87 88 static const struct xattr_handler * const ext4_xattr_handler_map[] = { 89 [EXT4_XATTR_INDEX_USER] = &ext4_xattr_user_handler, 90 #ifdef CONFIG_EXT4_FS_POSIX_ACL 91 [EXT4_XATTR_INDEX_POSIX_ACL_ACCESS] = &posix_acl_access_xattr_handler, 92 [EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT] = &posix_acl_default_xattr_handler, 93 #endif 94 [EXT4_XATTR_INDEX_TRUSTED] = &ext4_xattr_trusted_handler, 95 #ifdef CONFIG_EXT4_FS_SECURITY 96 [EXT4_XATTR_INDEX_SECURITY] = &ext4_xattr_security_handler, 97 #endif 98 [EXT4_XATTR_INDEX_HURD] = &ext4_xattr_hurd_handler, 99 }; 100 101 const struct xattr_handler *ext4_xattr_handlers[] = { 102 &ext4_xattr_user_handler, 103 &ext4_xattr_trusted_handler, 104 #ifdef CONFIG_EXT4_FS_POSIX_ACL 105 &posix_acl_access_xattr_handler, 106 &posix_acl_default_xattr_handler, 107 #endif 108 #ifdef CONFIG_EXT4_FS_SECURITY 109 &ext4_xattr_security_handler, 110 #endif 111 &ext4_xattr_hurd_handler, 112 NULL 113 }; 114 115 #define EA_BLOCK_CACHE(inode) (((struct ext4_sb_info *) \ 116 inode->i_sb->s_fs_info)->s_ea_block_cache) 117 118 #define EA_INODE_CACHE(inode) (((struct ext4_sb_info *) \ 119 inode->i_sb->s_fs_info)->s_ea_inode_cache) 120 121 static int 122 ext4_expand_inode_array(struct ext4_xattr_inode_array **ea_inode_array, 123 struct inode *inode); 124 125 #ifdef CONFIG_LOCKDEP 126 void ext4_xattr_inode_set_class(struct inode *ea_inode) 127 { 128 lockdep_set_subclass(&ea_inode->i_rwsem, 1); 129 } 130 #endif 131 132 static __le32 ext4_xattr_block_csum(struct inode *inode, 133 sector_t block_nr, 134 struct ext4_xattr_header *hdr) 135 { 136 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 137 __u32 csum; 138 __le64 dsk_block_nr = cpu_to_le64(block_nr); 139 __u32 dummy_csum = 0; 140 int offset = offsetof(struct ext4_xattr_header, h_checksum); 141 142 csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&dsk_block_nr, 143 sizeof(dsk_block_nr)); 144 csum = ext4_chksum(sbi, csum, (__u8 *)hdr, offset); 145 csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum, sizeof(dummy_csum)); 146 offset += sizeof(dummy_csum); 147 csum = ext4_chksum(sbi, csum, (__u8 *)hdr + offset, 148 EXT4_BLOCK_SIZE(inode->i_sb) - offset); 149 150 return cpu_to_le32(csum); 151 } 152 153 static int ext4_xattr_block_csum_verify(struct inode *inode, 154 struct buffer_head *bh) 155 { 156 struct ext4_xattr_header *hdr = BHDR(bh); 157 int ret = 1; 158 159 if (ext4_has_metadata_csum(inode->i_sb)) { 160 lock_buffer(bh); 161 ret = (hdr->h_checksum == ext4_xattr_block_csum(inode, 162 bh->b_blocknr, hdr)); 163 unlock_buffer(bh); 164 } 165 return ret; 166 } 167 168 static void ext4_xattr_block_csum_set(struct inode *inode, 169 struct buffer_head *bh) 170 { 171 if (ext4_has_metadata_csum(inode->i_sb)) 172 BHDR(bh)->h_checksum = ext4_xattr_block_csum(inode, 173 bh->b_blocknr, BHDR(bh)); 174 } 175 176 static inline const struct xattr_handler * 177 ext4_xattr_handler(int name_index) 178 { 179 const struct xattr_handler *handler = NULL; 180 181 if (name_index > 0 && name_index < ARRAY_SIZE(ext4_xattr_handler_map)) 182 handler = ext4_xattr_handler_map[name_index]; 183 return handler; 184 } 185 186 static int 187 check_xattrs(struct inode *inode, struct buffer_head *bh, 188 struct ext4_xattr_entry *entry, void *end, void *value_start, 189 const char *function, unsigned int line) 190 { 191 struct ext4_xattr_entry *e = entry; 192 int err = -EFSCORRUPTED; 193 char *err_str; 194 195 if (bh) { 196 if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || 197 BHDR(bh)->h_blocks != cpu_to_le32(1)) { 198 err_str = "invalid header"; 199 goto errout; 200 } 201 if (buffer_verified(bh)) 202 return 0; 203 if (!ext4_xattr_block_csum_verify(inode, bh)) { 204 err = -EFSBADCRC; 205 err_str = "invalid checksum"; 206 goto errout; 207 } 208 } else { 209 struct ext4_xattr_ibody_header *header = value_start; 210 211 header -= 1; 212 if (end - (void *)header < sizeof(*header) + sizeof(u32)) { 213 err_str = "in-inode xattr block too small"; 214 goto errout; 215 } 216 if (header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) { 217 err_str = "bad magic number in in-inode xattr"; 218 goto errout; 219 } 220 } 221 222 /* Find the end of the names list */ 223 while (!IS_LAST_ENTRY(e)) { 224 struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(e); 225 if ((void *)next >= end) { 226 err_str = "e_name out of bounds"; 227 goto errout; 228 } 229 if (strnlen(e->e_name, e->e_name_len) != e->e_name_len) { 230 err_str = "bad e_name length"; 231 goto errout; 232 } 233 e = next; 234 } 235 236 /* Check the values */ 237 while (!IS_LAST_ENTRY(entry)) { 238 u32 size = le32_to_cpu(entry->e_value_size); 239 unsigned long ea_ino = le32_to_cpu(entry->e_value_inum); 240 241 if (!ext4_has_feature_ea_inode(inode->i_sb) && ea_ino) { 242 err_str = "ea_inode specified without ea_inode feature enabled"; 243 goto errout; 244 } 245 if (ea_ino && ((ea_ino == EXT4_ROOT_INO) || 246 !ext4_valid_inum(inode->i_sb, ea_ino))) { 247 err_str = "invalid ea_ino"; 248 goto errout; 249 } 250 if (size > EXT4_XATTR_SIZE_MAX) { 251 err_str = "e_value size too large"; 252 goto errout; 253 } 254 255 if (size != 0 && entry->e_value_inum == 0) { 256 u16 offs = le16_to_cpu(entry->e_value_offs); 257 void *value; 258 259 /* 260 * The value cannot overlap the names, and the value 261 * with padding cannot extend beyond 'end'. Check both 262 * the padded and unpadded sizes, since the size may 263 * overflow to 0 when adding padding. 264 */ 265 if (offs > end - value_start) { 266 err_str = "e_value out of bounds"; 267 goto errout; 268 } 269 value = value_start + offs; 270 if (value < (void *)e + sizeof(u32) || 271 size > end - value || 272 EXT4_XATTR_SIZE(size) > end - value) { 273 err_str = "overlapping e_value "; 274 goto errout; 275 } 276 } 277 entry = EXT4_XATTR_NEXT(entry); 278 } 279 if (bh) 280 set_buffer_verified(bh); 281 return 0; 282 283 errout: 284 if (bh) 285 __ext4_error_inode(inode, function, line, 0, -err, 286 "corrupted xattr block %llu: %s", 287 (unsigned long long) bh->b_blocknr, 288 err_str); 289 else 290 __ext4_error_inode(inode, function, line, 0, -err, 291 "corrupted in-inode xattr: %s", err_str); 292 return err; 293 } 294 295 static inline int 296 __ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh, 297 const char *function, unsigned int line) 298 { 299 return check_xattrs(inode, bh, BFIRST(bh), bh->b_data + bh->b_size, 300 bh->b_data, function, line); 301 } 302 303 #define ext4_xattr_check_block(inode, bh) \ 304 __ext4_xattr_check_block((inode), (bh), __func__, __LINE__) 305 306 307 static inline int 308 __xattr_check_inode(struct inode *inode, struct ext4_xattr_ibody_header *header, 309 void *end, const char *function, unsigned int line) 310 { 311 return check_xattrs(inode, NULL, IFIRST(header), end, IFIRST(header), 312 function, line); 313 } 314 315 #define xattr_check_inode(inode, header, end) \ 316 __xattr_check_inode((inode), (header), (end), __func__, __LINE__) 317 318 static int 319 xattr_find_entry(struct inode *inode, struct ext4_xattr_entry **pentry, 320 void *end, int name_index, const char *name, int sorted) 321 { 322 struct ext4_xattr_entry *entry, *next; 323 size_t name_len; 324 int cmp = 1; 325 326 if (name == NULL) 327 return -EINVAL; 328 name_len = strlen(name); 329 for (entry = *pentry; !IS_LAST_ENTRY(entry); entry = next) { 330 next = EXT4_XATTR_NEXT(entry); 331 if ((void *) next >= end) { 332 EXT4_ERROR_INODE(inode, "corrupted xattr entries"); 333 return -EFSCORRUPTED; 334 } 335 cmp = name_index - entry->e_name_index; 336 if (!cmp) 337 cmp = name_len - entry->e_name_len; 338 if (!cmp) 339 cmp = memcmp(name, entry->e_name, name_len); 340 if (cmp <= 0 && (sorted || cmp == 0)) 341 break; 342 } 343 *pentry = entry; 344 return cmp ? -ENODATA : 0; 345 } 346 347 static u32 348 ext4_xattr_inode_hash(struct ext4_sb_info *sbi, const void *buffer, size_t size) 349 { 350 return ext4_chksum(sbi, sbi->s_csum_seed, buffer, size); 351 } 352 353 static u64 ext4_xattr_inode_get_ref(struct inode *ea_inode) 354 { 355 return ((u64)ea_inode->i_ctime.tv_sec << 32) | 356 (u32) inode_peek_iversion_raw(ea_inode); 357 } 358 359 static void ext4_xattr_inode_set_ref(struct inode *ea_inode, u64 ref_count) 360 { 361 ea_inode->i_ctime.tv_sec = (u32)(ref_count >> 32); 362 inode_set_iversion_raw(ea_inode, ref_count & 0xffffffff); 363 } 364 365 static u32 ext4_xattr_inode_get_hash(struct inode *ea_inode) 366 { 367 return (u32)ea_inode->i_atime.tv_sec; 368 } 369 370 static void ext4_xattr_inode_set_hash(struct inode *ea_inode, u32 hash) 371 { 372 ea_inode->i_atime.tv_sec = hash; 373 } 374 375 /* 376 * Read the EA value from an inode. 377 */ 378 static int ext4_xattr_inode_read(struct inode *ea_inode, void *buf, size_t size) 379 { 380 int blocksize = 1 << ea_inode->i_blkbits; 381 int bh_count = (size + blocksize - 1) >> ea_inode->i_blkbits; 382 int tail_size = (size % blocksize) ?: blocksize; 383 struct buffer_head *bhs_inline[8]; 384 struct buffer_head **bhs = bhs_inline; 385 int i, ret; 386 387 if (bh_count > ARRAY_SIZE(bhs_inline)) { 388 bhs = kmalloc_array(bh_count, sizeof(*bhs), GFP_NOFS); 389 if (!bhs) 390 return -ENOMEM; 391 } 392 393 ret = ext4_bread_batch(ea_inode, 0 /* block */, bh_count, 394 true /* wait */, bhs); 395 if (ret) 396 goto free_bhs; 397 398 for (i = 0; i < bh_count; i++) { 399 /* There shouldn't be any holes in ea_inode. */ 400 if (!bhs[i]) { 401 ret = -EFSCORRUPTED; 402 goto put_bhs; 403 } 404 memcpy((char *)buf + blocksize * i, bhs[i]->b_data, 405 i < bh_count - 1 ? blocksize : tail_size); 406 } 407 ret = 0; 408 put_bhs: 409 for (i = 0; i < bh_count; i++) 410 brelse(bhs[i]); 411 free_bhs: 412 if (bhs != bhs_inline) 413 kfree(bhs); 414 return ret; 415 } 416 417 #define EXT4_XATTR_INODE_GET_PARENT(inode) ((__u32)(inode)->i_mtime.tv_sec) 418 419 static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino, 420 u32 ea_inode_hash, struct inode **ea_inode) 421 { 422 struct inode *inode; 423 int err; 424 425 /* 426 * We have to check for this corruption early as otherwise 427 * iget_locked() could wait indefinitely for the state of our 428 * parent inode. 429 */ 430 if (parent->i_ino == ea_ino) { 431 ext4_error(parent->i_sb, 432 "Parent and EA inode have the same ino %lu", ea_ino); 433 return -EFSCORRUPTED; 434 } 435 436 inode = ext4_iget(parent->i_sb, ea_ino, EXT4_IGET_NORMAL); 437 if (IS_ERR(inode)) { 438 err = PTR_ERR(inode); 439 ext4_error(parent->i_sb, 440 "error while reading EA inode %lu err=%d", ea_ino, 441 err); 442 return err; 443 } 444 445 if (is_bad_inode(inode)) { 446 ext4_error(parent->i_sb, 447 "error while reading EA inode %lu is_bad_inode", 448 ea_ino); 449 err = -EIO; 450 goto error; 451 } 452 453 if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) { 454 ext4_error(parent->i_sb, 455 "EA inode %lu does not have EXT4_EA_INODE_FL flag", 456 ea_ino); 457 err = -EINVAL; 458 goto error; 459 } 460 461 ext4_xattr_inode_set_class(inode); 462 463 /* 464 * Check whether this is an old Lustre-style xattr inode. Lustre 465 * implementation does not have hash validation, rather it has a 466 * backpointer from ea_inode to the parent inode. 467 */ 468 if (ea_inode_hash != ext4_xattr_inode_get_hash(inode) && 469 EXT4_XATTR_INODE_GET_PARENT(inode) == parent->i_ino && 470 inode->i_generation == parent->i_generation) { 471 ext4_set_inode_state(inode, EXT4_STATE_LUSTRE_EA_INODE); 472 ext4_xattr_inode_set_ref(inode, 1); 473 } else { 474 inode_lock(inode); 475 inode->i_flags |= S_NOQUOTA; 476 inode_unlock(inode); 477 } 478 479 *ea_inode = inode; 480 return 0; 481 error: 482 iput(inode); 483 return err; 484 } 485 486 /* Remove entry from mbcache when EA inode is getting evicted */ 487 void ext4_evict_ea_inode(struct inode *inode) 488 { 489 struct mb_cache_entry *oe; 490 491 if (!EA_INODE_CACHE(inode)) 492 return; 493 /* Wait for entry to get unused so that we can remove it */ 494 while ((oe = mb_cache_entry_delete_or_get(EA_INODE_CACHE(inode), 495 ext4_xattr_inode_get_hash(inode), inode->i_ino))) { 496 mb_cache_entry_wait_unused(oe); 497 mb_cache_entry_put(EA_INODE_CACHE(inode), oe); 498 } 499 } 500 501 static int 502 ext4_xattr_inode_verify_hashes(struct inode *ea_inode, 503 struct ext4_xattr_entry *entry, void *buffer, 504 size_t size) 505 { 506 u32 hash; 507 508 /* Verify stored hash matches calculated hash. */ 509 hash = ext4_xattr_inode_hash(EXT4_SB(ea_inode->i_sb), buffer, size); 510 if (hash != ext4_xattr_inode_get_hash(ea_inode)) 511 return -EFSCORRUPTED; 512 513 if (entry) { 514 __le32 e_hash, tmp_data; 515 516 /* Verify entry hash. */ 517 tmp_data = cpu_to_le32(hash); 518 e_hash = ext4_xattr_hash_entry(entry->e_name, entry->e_name_len, 519 &tmp_data, 1); 520 /* All good? */ 521 if (e_hash == entry->e_hash) 522 return 0; 523 524 /* 525 * Not good. Maybe the entry hash was calculated 526 * using the buggy signed char version? 527 */ 528 e_hash = ext4_xattr_hash_entry_signed(entry->e_name, entry->e_name_len, 529 &tmp_data, 1); 530 /* Still no match - bad */ 531 if (e_hash != entry->e_hash) 532 return -EFSCORRUPTED; 533 534 /* Let people know about old hash */ 535 pr_warn_once("ext4: filesystem with signed xattr name hash"); 536 } 537 return 0; 538 } 539 540 /* 541 * Read xattr value from the EA inode. 542 */ 543 static int 544 ext4_xattr_inode_get(struct inode *inode, struct ext4_xattr_entry *entry, 545 void *buffer, size_t size) 546 { 547 struct mb_cache *ea_inode_cache = EA_INODE_CACHE(inode); 548 struct inode *ea_inode; 549 int err; 550 551 err = ext4_xattr_inode_iget(inode, le32_to_cpu(entry->e_value_inum), 552 le32_to_cpu(entry->e_hash), &ea_inode); 553 if (err) { 554 ea_inode = NULL; 555 goto out; 556 } 557 558 if (i_size_read(ea_inode) != size) { 559 ext4_warning_inode(ea_inode, 560 "ea_inode file size=%llu entry size=%zu", 561 i_size_read(ea_inode), size); 562 err = -EFSCORRUPTED; 563 goto out; 564 } 565 566 err = ext4_xattr_inode_read(ea_inode, buffer, size); 567 if (err) 568 goto out; 569 570 if (!ext4_test_inode_state(ea_inode, EXT4_STATE_LUSTRE_EA_INODE)) { 571 err = ext4_xattr_inode_verify_hashes(ea_inode, entry, buffer, 572 size); 573 if (err) { 574 ext4_warning_inode(ea_inode, 575 "EA inode hash validation failed"); 576 goto out; 577 } 578 579 if (ea_inode_cache) 580 mb_cache_entry_create(ea_inode_cache, GFP_NOFS, 581 ext4_xattr_inode_get_hash(ea_inode), 582 ea_inode->i_ino, true /* reusable */); 583 } 584 out: 585 iput(ea_inode); 586 return err; 587 } 588 589 static int 590 ext4_xattr_block_get(struct inode *inode, int name_index, const char *name, 591 void *buffer, size_t buffer_size) 592 { 593 struct buffer_head *bh = NULL; 594 struct ext4_xattr_entry *entry; 595 size_t size; 596 void *end; 597 int error; 598 struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode); 599 600 ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", 601 name_index, name, buffer, (long)buffer_size); 602 603 if (!EXT4_I(inode)->i_file_acl) 604 return -ENODATA; 605 ea_idebug(inode, "reading block %llu", 606 (unsigned long long)EXT4_I(inode)->i_file_acl); 607 bh = ext4_sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl, REQ_PRIO); 608 if (IS_ERR(bh)) 609 return PTR_ERR(bh); 610 ea_bdebug(bh, "b_count=%d, refcount=%d", 611 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); 612 error = ext4_xattr_check_block(inode, bh); 613 if (error) 614 goto cleanup; 615 ext4_xattr_block_cache_insert(ea_block_cache, bh); 616 entry = BFIRST(bh); 617 end = bh->b_data + bh->b_size; 618 error = xattr_find_entry(inode, &entry, end, name_index, name, 1); 619 if (error) 620 goto cleanup; 621 size = le32_to_cpu(entry->e_value_size); 622 error = -ERANGE; 623 if (unlikely(size > EXT4_XATTR_SIZE_MAX)) 624 goto cleanup; 625 if (buffer) { 626 if (size > buffer_size) 627 goto cleanup; 628 if (entry->e_value_inum) { 629 error = ext4_xattr_inode_get(inode, entry, buffer, 630 size); 631 if (error) 632 goto cleanup; 633 } else { 634 u16 offset = le16_to_cpu(entry->e_value_offs); 635 void *p = bh->b_data + offset; 636 637 if (unlikely(p + size > end)) 638 goto cleanup; 639 memcpy(buffer, p, size); 640 } 641 } 642 error = size; 643 644 cleanup: 645 brelse(bh); 646 return error; 647 } 648 649 int 650 ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name, 651 void *buffer, size_t buffer_size) 652 { 653 struct ext4_xattr_ibody_header *header; 654 struct ext4_xattr_entry *entry; 655 struct ext4_inode *raw_inode; 656 struct ext4_iloc iloc; 657 size_t size; 658 void *end; 659 int error; 660 661 if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR)) 662 return -ENODATA; 663 error = ext4_get_inode_loc(inode, &iloc); 664 if (error) 665 return error; 666 raw_inode = ext4_raw_inode(&iloc); 667 header = IHDR(inode, raw_inode); 668 end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; 669 error = xattr_check_inode(inode, header, end); 670 if (error) 671 goto cleanup; 672 entry = IFIRST(header); 673 error = xattr_find_entry(inode, &entry, end, name_index, name, 0); 674 if (error) 675 goto cleanup; 676 size = le32_to_cpu(entry->e_value_size); 677 error = -ERANGE; 678 if (unlikely(size > EXT4_XATTR_SIZE_MAX)) 679 goto cleanup; 680 if (buffer) { 681 if (size > buffer_size) 682 goto cleanup; 683 if (entry->e_value_inum) { 684 error = ext4_xattr_inode_get(inode, entry, buffer, 685 size); 686 if (error) 687 goto cleanup; 688 } else { 689 u16 offset = le16_to_cpu(entry->e_value_offs); 690 void *p = (void *)IFIRST(header) + offset; 691 692 if (unlikely(p + size > end)) 693 goto cleanup; 694 memcpy(buffer, p, size); 695 } 696 } 697 error = size; 698 699 cleanup: 700 brelse(iloc.bh); 701 return error; 702 } 703 704 /* 705 * ext4_xattr_get() 706 * 707 * Copy an extended attribute into the buffer 708 * provided, or compute the buffer size required. 709 * Buffer is NULL to compute the size of the buffer required. 710 * 711 * Returns a negative error number on failure, or the number of bytes 712 * used / required on success. 713 */ 714 int 715 ext4_xattr_get(struct inode *inode, int name_index, const char *name, 716 void *buffer, size_t buffer_size) 717 { 718 int error; 719 720 if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) 721 return -EIO; 722 723 if (strlen(name) > 255) 724 return -ERANGE; 725 726 down_read(&EXT4_I(inode)->xattr_sem); 727 error = ext4_xattr_ibody_get(inode, name_index, name, buffer, 728 buffer_size); 729 if (error == -ENODATA) 730 error = ext4_xattr_block_get(inode, name_index, name, buffer, 731 buffer_size); 732 up_read(&EXT4_I(inode)->xattr_sem); 733 return error; 734 } 735 736 static int 737 ext4_xattr_list_entries(struct dentry *dentry, struct ext4_xattr_entry *entry, 738 char *buffer, size_t buffer_size) 739 { 740 size_t rest = buffer_size; 741 742 for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) { 743 const struct xattr_handler *handler = 744 ext4_xattr_handler(entry->e_name_index); 745 746 if (handler && (!handler->list || handler->list(dentry))) { 747 const char *prefix = handler->prefix ?: handler->name; 748 size_t prefix_len = strlen(prefix); 749 size_t size = prefix_len + entry->e_name_len + 1; 750 751 if (buffer) { 752 if (size > rest) 753 return -ERANGE; 754 memcpy(buffer, prefix, prefix_len); 755 buffer += prefix_len; 756 memcpy(buffer, entry->e_name, entry->e_name_len); 757 buffer += entry->e_name_len; 758 *buffer++ = 0; 759 } 760 rest -= size; 761 } 762 } 763 return buffer_size - rest; /* total size */ 764 } 765 766 static int 767 ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size) 768 { 769 struct inode *inode = d_inode(dentry); 770 struct buffer_head *bh = NULL; 771 int error; 772 773 ea_idebug(inode, "buffer=%p, buffer_size=%ld", 774 buffer, (long)buffer_size); 775 776 if (!EXT4_I(inode)->i_file_acl) 777 return 0; 778 ea_idebug(inode, "reading block %llu", 779 (unsigned long long)EXT4_I(inode)->i_file_acl); 780 bh = ext4_sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl, REQ_PRIO); 781 if (IS_ERR(bh)) 782 return PTR_ERR(bh); 783 ea_bdebug(bh, "b_count=%d, refcount=%d", 784 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); 785 error = ext4_xattr_check_block(inode, bh); 786 if (error) 787 goto cleanup; 788 ext4_xattr_block_cache_insert(EA_BLOCK_CACHE(inode), bh); 789 error = ext4_xattr_list_entries(dentry, BFIRST(bh), buffer, 790 buffer_size); 791 cleanup: 792 brelse(bh); 793 return error; 794 } 795 796 static int 797 ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size) 798 { 799 struct inode *inode = d_inode(dentry); 800 struct ext4_xattr_ibody_header *header; 801 struct ext4_inode *raw_inode; 802 struct ext4_iloc iloc; 803 void *end; 804 int error; 805 806 if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR)) 807 return 0; 808 error = ext4_get_inode_loc(inode, &iloc); 809 if (error) 810 return error; 811 raw_inode = ext4_raw_inode(&iloc); 812 header = IHDR(inode, raw_inode); 813 end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; 814 error = xattr_check_inode(inode, header, end); 815 if (error) 816 goto cleanup; 817 error = ext4_xattr_list_entries(dentry, IFIRST(header), 818 buffer, buffer_size); 819 820 cleanup: 821 brelse(iloc.bh); 822 return error; 823 } 824 825 /* 826 * Inode operation listxattr() 827 * 828 * d_inode(dentry)->i_rwsem: don't care 829 * 830 * Copy a list of attribute names into the buffer 831 * provided, or compute the buffer size required. 832 * Buffer is NULL to compute the size of the buffer required. 833 * 834 * Returns a negative error number on failure, or the number of bytes 835 * used / required on success. 836 */ 837 ssize_t 838 ext4_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) 839 { 840 int ret, ret2; 841 842 down_read(&EXT4_I(d_inode(dentry))->xattr_sem); 843 ret = ret2 = ext4_xattr_ibody_list(dentry, buffer, buffer_size); 844 if (ret < 0) 845 goto errout; 846 if (buffer) { 847 buffer += ret; 848 buffer_size -= ret; 849 } 850 ret = ext4_xattr_block_list(dentry, buffer, buffer_size); 851 if (ret < 0) 852 goto errout; 853 ret += ret2; 854 errout: 855 up_read(&EXT4_I(d_inode(dentry))->xattr_sem); 856 return ret; 857 } 858 859 /* 860 * If the EXT4_FEATURE_COMPAT_EXT_ATTR feature of this file system is 861 * not set, set it. 862 */ 863 static void ext4_xattr_update_super_block(handle_t *handle, 864 struct super_block *sb) 865 { 866 if (ext4_has_feature_xattr(sb)) 867 return; 868 869 BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access"); 870 if (ext4_journal_get_write_access(handle, sb, EXT4_SB(sb)->s_sbh, 871 EXT4_JTR_NONE) == 0) { 872 lock_buffer(EXT4_SB(sb)->s_sbh); 873 ext4_set_feature_xattr(sb); 874 ext4_superblock_csum_set(sb); 875 unlock_buffer(EXT4_SB(sb)->s_sbh); 876 ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); 877 } 878 } 879 880 int ext4_get_inode_usage(struct inode *inode, qsize_t *usage) 881 { 882 struct ext4_iloc iloc = { .bh = NULL }; 883 struct buffer_head *bh = NULL; 884 struct ext4_inode *raw_inode; 885 struct ext4_xattr_ibody_header *header; 886 struct ext4_xattr_entry *entry; 887 qsize_t ea_inode_refs = 0; 888 void *end; 889 int ret; 890 891 lockdep_assert_held_read(&EXT4_I(inode)->xattr_sem); 892 893 if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) { 894 ret = ext4_get_inode_loc(inode, &iloc); 895 if (ret) 896 goto out; 897 raw_inode = ext4_raw_inode(&iloc); 898 header = IHDR(inode, raw_inode); 899 end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; 900 ret = xattr_check_inode(inode, header, end); 901 if (ret) 902 goto out; 903 904 for (entry = IFIRST(header); !IS_LAST_ENTRY(entry); 905 entry = EXT4_XATTR_NEXT(entry)) 906 if (entry->e_value_inum) 907 ea_inode_refs++; 908 } 909 910 if (EXT4_I(inode)->i_file_acl) { 911 bh = ext4_sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl, REQ_PRIO); 912 if (IS_ERR(bh)) { 913 ret = PTR_ERR(bh); 914 bh = NULL; 915 goto out; 916 } 917 918 ret = ext4_xattr_check_block(inode, bh); 919 if (ret) 920 goto out; 921 922 for (entry = BFIRST(bh); !IS_LAST_ENTRY(entry); 923 entry = EXT4_XATTR_NEXT(entry)) 924 if (entry->e_value_inum) 925 ea_inode_refs++; 926 } 927 *usage = ea_inode_refs + 1; 928 ret = 0; 929 out: 930 brelse(iloc.bh); 931 brelse(bh); 932 return ret; 933 } 934 935 static inline size_t round_up_cluster(struct inode *inode, size_t length) 936 { 937 struct super_block *sb = inode->i_sb; 938 size_t cluster_size = 1 << (EXT4_SB(sb)->s_cluster_bits + 939 inode->i_blkbits); 940 size_t mask = ~(cluster_size - 1); 941 942 return (length + cluster_size - 1) & mask; 943 } 944 945 static int ext4_xattr_inode_alloc_quota(struct inode *inode, size_t len) 946 { 947 int err; 948 949 err = dquot_alloc_inode(inode); 950 if (err) 951 return err; 952 err = dquot_alloc_space_nodirty(inode, round_up_cluster(inode, len)); 953 if (err) 954 dquot_free_inode(inode); 955 return err; 956 } 957 958 static void ext4_xattr_inode_free_quota(struct inode *parent, 959 struct inode *ea_inode, 960 size_t len) 961 { 962 if (ea_inode && 963 ext4_test_inode_state(ea_inode, EXT4_STATE_LUSTRE_EA_INODE)) 964 return; 965 dquot_free_space_nodirty(parent, round_up_cluster(parent, len)); 966 dquot_free_inode(parent); 967 } 968 969 int __ext4_xattr_set_credits(struct super_block *sb, struct inode *inode, 970 struct buffer_head *block_bh, size_t value_len, 971 bool is_create) 972 { 973 int credits; 974 int blocks; 975 976 /* 977 * 1) Owner inode update 978 * 2) Ref count update on old xattr block 979 * 3) new xattr block 980 * 4) block bitmap update for new xattr block 981 * 5) group descriptor for new xattr block 982 * 6) block bitmap update for old xattr block 983 * 7) group descriptor for old block 984 * 985 * 6 & 7 can happen if we have two racing threads T_a and T_b 986 * which are each trying to set an xattr on inodes I_a and I_b 987 * which were both initially sharing an xattr block. 988 */ 989 credits = 7; 990 991 /* Quota updates. */ 992 credits += EXT4_MAXQUOTAS_TRANS_BLOCKS(sb); 993 994 /* 995 * In case of inline data, we may push out the data to a block, 996 * so we need to reserve credits for this eventuality 997 */ 998 if (inode && ext4_has_inline_data(inode)) 999 credits += ext4_writepage_trans_blocks(inode) + 1; 1000 1001 /* We are done if ea_inode feature is not enabled. */ 1002 if (!ext4_has_feature_ea_inode(sb)) 1003 return credits; 1004 1005 /* New ea_inode, inode map, block bitmap, group descriptor. */ 1006 credits += 4; 1007 1008 /* Data blocks. */ 1009 blocks = (value_len + sb->s_blocksize - 1) >> sb->s_blocksize_bits; 1010 1011 /* Indirection block or one level of extent tree. */ 1012 blocks += 1; 1013 1014 /* Block bitmap and group descriptor updates for each block. */ 1015 credits += blocks * 2; 1016 1017 /* Blocks themselves. */ 1018 credits += blocks; 1019 1020 if (!is_create) { 1021 /* Dereference ea_inode holding old xattr value. 1022 * Old ea_inode, inode map, block bitmap, group descriptor. 1023 */ 1024 credits += 4; 1025 1026 /* Data blocks for old ea_inode. */ 1027 blocks = XATTR_SIZE_MAX >> sb->s_blocksize_bits; 1028 1029 /* Indirection block or one level of extent tree for old 1030 * ea_inode. 1031 */ 1032 blocks += 1; 1033 1034 /* Block bitmap and group descriptor updates for each block. */ 1035 credits += blocks * 2; 1036 } 1037 1038 /* We may need to clone the existing xattr block in which case we need 1039 * to increment ref counts for existing ea_inodes referenced by it. 1040 */ 1041 if (block_bh) { 1042 struct ext4_xattr_entry *entry = BFIRST(block_bh); 1043 1044 for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) 1045 if (entry->e_value_inum) 1046 /* Ref count update on ea_inode. */ 1047 credits += 1; 1048 } 1049 return credits; 1050 } 1051 1052 static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode, 1053 int ref_change) 1054 { 1055 struct ext4_iloc iloc; 1056 s64 ref_count; 1057 int ret; 1058 1059 inode_lock(ea_inode); 1060 1061 ret = ext4_reserve_inode_write(handle, ea_inode, &iloc); 1062 if (ret) 1063 goto out; 1064 1065 ref_count = ext4_xattr_inode_get_ref(ea_inode); 1066 ref_count += ref_change; 1067 ext4_xattr_inode_set_ref(ea_inode, ref_count); 1068 1069 if (ref_change > 0) { 1070 WARN_ONCE(ref_count <= 0, "EA inode %lu ref_count=%lld", 1071 ea_inode->i_ino, ref_count); 1072 1073 if (ref_count == 1) { 1074 WARN_ONCE(ea_inode->i_nlink, "EA inode %lu i_nlink=%u", 1075 ea_inode->i_ino, ea_inode->i_nlink); 1076 1077 set_nlink(ea_inode, 1); 1078 ext4_orphan_del(handle, ea_inode); 1079 } 1080 } else { 1081 WARN_ONCE(ref_count < 0, "EA inode %lu ref_count=%lld", 1082 ea_inode->i_ino, ref_count); 1083 1084 if (ref_count == 0) { 1085 WARN_ONCE(ea_inode->i_nlink != 1, 1086 "EA inode %lu i_nlink=%u", 1087 ea_inode->i_ino, ea_inode->i_nlink); 1088 1089 clear_nlink(ea_inode); 1090 ext4_orphan_add(handle, ea_inode); 1091 } 1092 } 1093 1094 ret = ext4_mark_iloc_dirty(handle, ea_inode, &iloc); 1095 if (ret) 1096 ext4_warning_inode(ea_inode, 1097 "ext4_mark_iloc_dirty() failed ret=%d", ret); 1098 out: 1099 inode_unlock(ea_inode); 1100 return ret; 1101 } 1102 1103 static int ext4_xattr_inode_inc_ref(handle_t *handle, struct inode *ea_inode) 1104 { 1105 return ext4_xattr_inode_update_ref(handle, ea_inode, 1); 1106 } 1107 1108 static int ext4_xattr_inode_dec_ref(handle_t *handle, struct inode *ea_inode) 1109 { 1110 return ext4_xattr_inode_update_ref(handle, ea_inode, -1); 1111 } 1112 1113 static int ext4_xattr_inode_inc_ref_all(handle_t *handle, struct inode *parent, 1114 struct ext4_xattr_entry *first) 1115 { 1116 struct inode *ea_inode; 1117 struct ext4_xattr_entry *entry; 1118 struct ext4_xattr_entry *failed_entry; 1119 unsigned int ea_ino; 1120 int err, saved_err; 1121 1122 for (entry = first; !IS_LAST_ENTRY(entry); 1123 entry = EXT4_XATTR_NEXT(entry)) { 1124 if (!entry->e_value_inum) 1125 continue; 1126 ea_ino = le32_to_cpu(entry->e_value_inum); 1127 err = ext4_xattr_inode_iget(parent, ea_ino, 1128 le32_to_cpu(entry->e_hash), 1129 &ea_inode); 1130 if (err) 1131 goto cleanup; 1132 err = ext4_xattr_inode_inc_ref(handle, ea_inode); 1133 if (err) { 1134 ext4_warning_inode(ea_inode, "inc ref error %d", err); 1135 iput(ea_inode); 1136 goto cleanup; 1137 } 1138 iput(ea_inode); 1139 } 1140 return 0; 1141 1142 cleanup: 1143 saved_err = err; 1144 failed_entry = entry; 1145 1146 for (entry = first; entry != failed_entry; 1147 entry = EXT4_XATTR_NEXT(entry)) { 1148 if (!entry->e_value_inum) 1149 continue; 1150 ea_ino = le32_to_cpu(entry->e_value_inum); 1151 err = ext4_xattr_inode_iget(parent, ea_ino, 1152 le32_to_cpu(entry->e_hash), 1153 &ea_inode); 1154 if (err) { 1155 ext4_warning(parent->i_sb, 1156 "cleanup ea_ino %u iget error %d", ea_ino, 1157 err); 1158 continue; 1159 } 1160 err = ext4_xattr_inode_dec_ref(handle, ea_inode); 1161 if (err) 1162 ext4_warning_inode(ea_inode, "cleanup dec ref error %d", 1163 err); 1164 iput(ea_inode); 1165 } 1166 return saved_err; 1167 } 1168 1169 static int ext4_xattr_restart_fn(handle_t *handle, struct inode *inode, 1170 struct buffer_head *bh, bool block_csum, bool dirty) 1171 { 1172 int error; 1173 1174 if (bh && dirty) { 1175 if (block_csum) 1176 ext4_xattr_block_csum_set(inode, bh); 1177 error = ext4_handle_dirty_metadata(handle, NULL, bh); 1178 if (error) { 1179 ext4_warning(inode->i_sb, "Handle metadata (error %d)", 1180 error); 1181 return error; 1182 } 1183 } 1184 return 0; 1185 } 1186 1187 static void 1188 ext4_xattr_inode_dec_ref_all(handle_t *handle, struct inode *parent, 1189 struct buffer_head *bh, 1190 struct ext4_xattr_entry *first, bool block_csum, 1191 struct ext4_xattr_inode_array **ea_inode_array, 1192 int extra_credits, bool skip_quota) 1193 { 1194 struct inode *ea_inode; 1195 struct ext4_xattr_entry *entry; 1196 bool dirty = false; 1197 unsigned int ea_ino; 1198 int err; 1199 int credits; 1200 1201 /* One credit for dec ref on ea_inode, one for orphan list addition, */ 1202 credits = 2 + extra_credits; 1203 1204 for (entry = first; !IS_LAST_ENTRY(entry); 1205 entry = EXT4_XATTR_NEXT(entry)) { 1206 if (!entry->e_value_inum) 1207 continue; 1208 ea_ino = le32_to_cpu(entry->e_value_inum); 1209 err = ext4_xattr_inode_iget(parent, ea_ino, 1210 le32_to_cpu(entry->e_hash), 1211 &ea_inode); 1212 if (err) 1213 continue; 1214 1215 err = ext4_expand_inode_array(ea_inode_array, ea_inode); 1216 if (err) { 1217 ext4_warning_inode(ea_inode, 1218 "Expand inode array err=%d", err); 1219 iput(ea_inode); 1220 continue; 1221 } 1222 1223 err = ext4_journal_ensure_credits_fn(handle, credits, credits, 1224 ext4_free_metadata_revoke_credits(parent->i_sb, 1), 1225 ext4_xattr_restart_fn(handle, parent, bh, block_csum, 1226 dirty)); 1227 if (err < 0) { 1228 ext4_warning_inode(ea_inode, "Ensure credits err=%d", 1229 err); 1230 continue; 1231 } 1232 if (err > 0) { 1233 err = ext4_journal_get_write_access(handle, 1234 parent->i_sb, bh, EXT4_JTR_NONE); 1235 if (err) { 1236 ext4_warning_inode(ea_inode, 1237 "Re-get write access err=%d", 1238 err); 1239 continue; 1240 } 1241 } 1242 1243 err = ext4_xattr_inode_dec_ref(handle, ea_inode); 1244 if (err) { 1245 ext4_warning_inode(ea_inode, "ea_inode dec ref err=%d", 1246 err); 1247 continue; 1248 } 1249 1250 if (!skip_quota) 1251 ext4_xattr_inode_free_quota(parent, ea_inode, 1252 le32_to_cpu(entry->e_value_size)); 1253 1254 /* 1255 * Forget about ea_inode within the same transaction that 1256 * decrements the ref count. This avoids duplicate decrements in 1257 * case the rest of the work spills over to subsequent 1258 * transactions. 1259 */ 1260 entry->e_value_inum = 0; 1261 entry->e_value_size = 0; 1262 1263 dirty = true; 1264 } 1265 1266 if (dirty) { 1267 /* 1268 * Note that we are deliberately skipping csum calculation for 1269 * the final update because we do not expect any journal 1270 * restarts until xattr block is freed. 1271 */ 1272 1273 err = ext4_handle_dirty_metadata(handle, NULL, bh); 1274 if (err) 1275 ext4_warning_inode(parent, 1276 "handle dirty metadata err=%d", err); 1277 } 1278 } 1279 1280 /* 1281 * Release the xattr block BH: If the reference count is > 1, decrement it; 1282 * otherwise free the block. 1283 */ 1284 static void 1285 ext4_xattr_release_block(handle_t *handle, struct inode *inode, 1286 struct buffer_head *bh, 1287 struct ext4_xattr_inode_array **ea_inode_array, 1288 int extra_credits) 1289 { 1290 struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode); 1291 u32 hash, ref; 1292 int error = 0; 1293 1294 BUFFER_TRACE(bh, "get_write_access"); 1295 error = ext4_journal_get_write_access(handle, inode->i_sb, bh, 1296 EXT4_JTR_NONE); 1297 if (error) 1298 goto out; 1299 1300 retry_ref: 1301 lock_buffer(bh); 1302 hash = le32_to_cpu(BHDR(bh)->h_hash); 1303 ref = le32_to_cpu(BHDR(bh)->h_refcount); 1304 if (ref == 1) { 1305 ea_bdebug(bh, "refcount now=0; freeing"); 1306 /* 1307 * This must happen under buffer lock for 1308 * ext4_xattr_block_set() to reliably detect freed block 1309 */ 1310 if (ea_block_cache) { 1311 struct mb_cache_entry *oe; 1312 1313 oe = mb_cache_entry_delete_or_get(ea_block_cache, hash, 1314 bh->b_blocknr); 1315 if (oe) { 1316 unlock_buffer(bh); 1317 mb_cache_entry_wait_unused(oe); 1318 mb_cache_entry_put(ea_block_cache, oe); 1319 goto retry_ref; 1320 } 1321 } 1322 get_bh(bh); 1323 unlock_buffer(bh); 1324 1325 if (ext4_has_feature_ea_inode(inode->i_sb)) 1326 ext4_xattr_inode_dec_ref_all(handle, inode, bh, 1327 BFIRST(bh), 1328 true /* block_csum */, 1329 ea_inode_array, 1330 extra_credits, 1331 true /* skip_quota */); 1332 ext4_free_blocks(handle, inode, bh, 0, 1, 1333 EXT4_FREE_BLOCKS_METADATA | 1334 EXT4_FREE_BLOCKS_FORGET); 1335 } else { 1336 ref--; 1337 BHDR(bh)->h_refcount = cpu_to_le32(ref); 1338 if (ref == EXT4_XATTR_REFCOUNT_MAX - 1) { 1339 struct mb_cache_entry *ce; 1340 1341 if (ea_block_cache) { 1342 ce = mb_cache_entry_get(ea_block_cache, hash, 1343 bh->b_blocknr); 1344 if (ce) { 1345 set_bit(MBE_REUSABLE_B, &ce->e_flags); 1346 mb_cache_entry_put(ea_block_cache, ce); 1347 } 1348 } 1349 } 1350 1351 ext4_xattr_block_csum_set(inode, bh); 1352 /* 1353 * Beware of this ugliness: Releasing of xattr block references 1354 * from different inodes can race and so we have to protect 1355 * from a race where someone else frees the block (and releases 1356 * its journal_head) before we are done dirtying the buffer. In 1357 * nojournal mode this race is harmless and we actually cannot 1358 * call ext4_handle_dirty_metadata() with locked buffer as 1359 * that function can call sync_dirty_buffer() so for that case 1360 * we handle the dirtying after unlocking the buffer. 1361 */ 1362 if (ext4_handle_valid(handle)) 1363 error = ext4_handle_dirty_metadata(handle, inode, bh); 1364 unlock_buffer(bh); 1365 if (!ext4_handle_valid(handle)) 1366 error = ext4_handle_dirty_metadata(handle, inode, bh); 1367 if (IS_SYNC(inode)) 1368 ext4_handle_sync(handle); 1369 dquot_free_block(inode, EXT4_C2B(EXT4_SB(inode->i_sb), 1)); 1370 ea_bdebug(bh, "refcount now=%d; releasing", 1371 le32_to_cpu(BHDR(bh)->h_refcount)); 1372 } 1373 out: 1374 ext4_std_error(inode->i_sb, error); 1375 return; 1376 } 1377 1378 /* 1379 * Find the available free space for EAs. This also returns the total number of 1380 * bytes used by EA entries. 1381 */ 1382 static size_t ext4_xattr_free_space(struct ext4_xattr_entry *last, 1383 size_t *min_offs, void *base, int *total) 1384 { 1385 for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { 1386 if (!last->e_value_inum && last->e_value_size) { 1387 size_t offs = le16_to_cpu(last->e_value_offs); 1388 if (offs < *min_offs) 1389 *min_offs = offs; 1390 } 1391 if (total) 1392 *total += EXT4_XATTR_LEN(last->e_name_len); 1393 } 1394 return (*min_offs - ((void *)last - base) - sizeof(__u32)); 1395 } 1396 1397 /* 1398 * Write the value of the EA in an inode. 1399 */ 1400 static int ext4_xattr_inode_write(handle_t *handle, struct inode *ea_inode, 1401 const void *buf, int bufsize) 1402 { 1403 struct buffer_head *bh = NULL; 1404 unsigned long block = 0; 1405 int blocksize = ea_inode->i_sb->s_blocksize; 1406 int max_blocks = (bufsize + blocksize - 1) >> ea_inode->i_blkbits; 1407 int csize, wsize = 0; 1408 int ret = 0, ret2 = 0; 1409 int retries = 0; 1410 1411 retry: 1412 while (ret >= 0 && ret < max_blocks) { 1413 struct ext4_map_blocks map; 1414 map.m_lblk = block += ret; 1415 map.m_len = max_blocks -= ret; 1416 1417 ret = ext4_map_blocks(handle, ea_inode, &map, 1418 EXT4_GET_BLOCKS_CREATE); 1419 if (ret <= 0) { 1420 ext4_mark_inode_dirty(handle, ea_inode); 1421 if (ret == -ENOSPC && 1422 ext4_should_retry_alloc(ea_inode->i_sb, &retries)) { 1423 ret = 0; 1424 goto retry; 1425 } 1426 break; 1427 } 1428 } 1429 1430 if (ret < 0) 1431 return ret; 1432 1433 block = 0; 1434 while (wsize < bufsize) { 1435 brelse(bh); 1436 csize = (bufsize - wsize) > blocksize ? blocksize : 1437 bufsize - wsize; 1438 bh = ext4_getblk(handle, ea_inode, block, 0); 1439 if (IS_ERR(bh)) 1440 return PTR_ERR(bh); 1441 if (!bh) { 1442 WARN_ON_ONCE(1); 1443 EXT4_ERROR_INODE(ea_inode, 1444 "ext4_getblk() return bh = NULL"); 1445 return -EFSCORRUPTED; 1446 } 1447 ret = ext4_journal_get_write_access(handle, ea_inode->i_sb, bh, 1448 EXT4_JTR_NONE); 1449 if (ret) 1450 goto out; 1451 1452 memcpy(bh->b_data, buf, csize); 1453 set_buffer_uptodate(bh); 1454 ext4_handle_dirty_metadata(handle, ea_inode, bh); 1455 1456 buf += csize; 1457 wsize += csize; 1458 block += 1; 1459 } 1460 1461 inode_lock(ea_inode); 1462 i_size_write(ea_inode, wsize); 1463 ext4_update_i_disksize(ea_inode, wsize); 1464 inode_unlock(ea_inode); 1465 1466 ret2 = ext4_mark_inode_dirty(handle, ea_inode); 1467 if (unlikely(ret2 && !ret)) 1468 ret = ret2; 1469 1470 out: 1471 brelse(bh); 1472 1473 return ret; 1474 } 1475 1476 /* 1477 * Create an inode to store the value of a large EA. 1478 */ 1479 static struct inode *ext4_xattr_inode_create(handle_t *handle, 1480 struct inode *inode, u32 hash) 1481 { 1482 struct inode *ea_inode = NULL; 1483 uid_t owner[2] = { i_uid_read(inode), i_gid_read(inode) }; 1484 int err; 1485 1486 if (inode->i_sb->s_root == NULL) { 1487 ext4_warning(inode->i_sb, 1488 "refuse to create EA inode when umounting"); 1489 WARN_ON(1); 1490 return ERR_PTR(-EINVAL); 1491 } 1492 1493 /* 1494 * Let the next inode be the goal, so we try and allocate the EA inode 1495 * in the same group, or nearby one. 1496 */ 1497 ea_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode, 1498 S_IFREG | 0600, NULL, inode->i_ino + 1, owner, 1499 EXT4_EA_INODE_FL); 1500 if (!IS_ERR(ea_inode)) { 1501 ea_inode->i_op = &ext4_file_inode_operations; 1502 ea_inode->i_fop = &ext4_file_operations; 1503 ext4_set_aops(ea_inode); 1504 ext4_xattr_inode_set_class(ea_inode); 1505 unlock_new_inode(ea_inode); 1506 ext4_xattr_inode_set_ref(ea_inode, 1); 1507 ext4_xattr_inode_set_hash(ea_inode, hash); 1508 err = ext4_mark_inode_dirty(handle, ea_inode); 1509 if (!err) 1510 err = ext4_inode_attach_jinode(ea_inode); 1511 if (err) { 1512 if (ext4_xattr_inode_dec_ref(handle, ea_inode)) 1513 ext4_warning_inode(ea_inode, 1514 "cleanup dec ref error %d", err); 1515 iput(ea_inode); 1516 return ERR_PTR(err); 1517 } 1518 1519 /* 1520 * Xattr inodes are shared therefore quota charging is performed 1521 * at a higher level. 1522 */ 1523 dquot_free_inode(ea_inode); 1524 dquot_drop(ea_inode); 1525 inode_lock(ea_inode); 1526 ea_inode->i_flags |= S_NOQUOTA; 1527 inode_unlock(ea_inode); 1528 } 1529 1530 return ea_inode; 1531 } 1532 1533 static struct inode * 1534 ext4_xattr_inode_cache_find(struct inode *inode, const void *value, 1535 size_t value_len, u32 hash) 1536 { 1537 struct inode *ea_inode; 1538 struct mb_cache_entry *ce; 1539 struct mb_cache *ea_inode_cache = EA_INODE_CACHE(inode); 1540 void *ea_data; 1541 1542 if (!ea_inode_cache) 1543 return NULL; 1544 1545 ce = mb_cache_entry_find_first(ea_inode_cache, hash); 1546 if (!ce) 1547 return NULL; 1548 1549 WARN_ON_ONCE(ext4_handle_valid(journal_current_handle()) && 1550 !(current->flags & PF_MEMALLOC_NOFS)); 1551 1552 ea_data = kvmalloc(value_len, GFP_KERNEL); 1553 if (!ea_data) { 1554 mb_cache_entry_put(ea_inode_cache, ce); 1555 return NULL; 1556 } 1557 1558 while (ce) { 1559 ea_inode = ext4_iget(inode->i_sb, ce->e_value, 1560 EXT4_IGET_NORMAL); 1561 if (!IS_ERR(ea_inode) && 1562 !is_bad_inode(ea_inode) && 1563 (EXT4_I(ea_inode)->i_flags & EXT4_EA_INODE_FL) && 1564 i_size_read(ea_inode) == value_len && 1565 !ext4_xattr_inode_read(ea_inode, ea_data, value_len) && 1566 !ext4_xattr_inode_verify_hashes(ea_inode, NULL, ea_data, 1567 value_len) && 1568 !memcmp(value, ea_data, value_len)) { 1569 mb_cache_entry_touch(ea_inode_cache, ce); 1570 mb_cache_entry_put(ea_inode_cache, ce); 1571 kvfree(ea_data); 1572 return ea_inode; 1573 } 1574 1575 if (!IS_ERR(ea_inode)) 1576 iput(ea_inode); 1577 ce = mb_cache_entry_find_next(ea_inode_cache, ce); 1578 } 1579 kvfree(ea_data); 1580 return NULL; 1581 } 1582 1583 /* 1584 * Add value of the EA in an inode. 1585 */ 1586 static int ext4_xattr_inode_lookup_create(handle_t *handle, struct inode *inode, 1587 const void *value, size_t value_len, 1588 struct inode **ret_inode) 1589 { 1590 struct inode *ea_inode; 1591 u32 hash; 1592 int err; 1593 1594 hash = ext4_xattr_inode_hash(EXT4_SB(inode->i_sb), value, value_len); 1595 ea_inode = ext4_xattr_inode_cache_find(inode, value, value_len, hash); 1596 if (ea_inode) { 1597 err = ext4_xattr_inode_inc_ref(handle, ea_inode); 1598 if (err) { 1599 iput(ea_inode); 1600 return err; 1601 } 1602 1603 *ret_inode = ea_inode; 1604 return 0; 1605 } 1606 1607 /* Create an inode for the EA value */ 1608 ea_inode = ext4_xattr_inode_create(handle, inode, hash); 1609 if (IS_ERR(ea_inode)) 1610 return PTR_ERR(ea_inode); 1611 1612 err = ext4_xattr_inode_write(handle, ea_inode, value, value_len); 1613 if (err) { 1614 if (ext4_xattr_inode_dec_ref(handle, ea_inode)) 1615 ext4_warning_inode(ea_inode, "cleanup dec ref error %d", err); 1616 iput(ea_inode); 1617 return err; 1618 } 1619 1620 if (EA_INODE_CACHE(inode)) 1621 mb_cache_entry_create(EA_INODE_CACHE(inode), GFP_NOFS, hash, 1622 ea_inode->i_ino, true /* reusable */); 1623 1624 *ret_inode = ea_inode; 1625 return 0; 1626 } 1627 1628 /* 1629 * Reserve min(block_size/8, 1024) bytes for xattr entries/names if ea_inode 1630 * feature is enabled. 1631 */ 1632 #define EXT4_XATTR_BLOCK_RESERVE(inode) min(i_blocksize(inode)/8, 1024U) 1633 1634 static int ext4_xattr_set_entry(struct ext4_xattr_info *i, 1635 struct ext4_xattr_search *s, 1636 handle_t *handle, struct inode *inode, 1637 bool is_block) 1638 { 1639 struct ext4_xattr_entry *last, *next; 1640 struct ext4_xattr_entry *here = s->here; 1641 size_t min_offs = s->end - s->base, name_len = strlen(i->name); 1642 int in_inode = i->in_inode; 1643 struct inode *old_ea_inode = NULL; 1644 struct inode *new_ea_inode = NULL; 1645 size_t old_size, new_size; 1646 int ret; 1647 1648 /* Space used by old and new values. */ 1649 old_size = (!s->not_found && !here->e_value_inum) ? 1650 EXT4_XATTR_SIZE(le32_to_cpu(here->e_value_size)) : 0; 1651 new_size = (i->value && !in_inode) ? EXT4_XATTR_SIZE(i->value_len) : 0; 1652 1653 /* 1654 * Optimization for the simple case when old and new values have the 1655 * same padded sizes. Not applicable if external inodes are involved. 1656 */ 1657 if (new_size && new_size == old_size) { 1658 size_t offs = le16_to_cpu(here->e_value_offs); 1659 void *val = s->base + offs; 1660 1661 here->e_value_size = cpu_to_le32(i->value_len); 1662 if (i->value == EXT4_ZERO_XATTR_VALUE) { 1663 memset(val, 0, new_size); 1664 } else { 1665 memcpy(val, i->value, i->value_len); 1666 /* Clear padding bytes. */ 1667 memset(val + i->value_len, 0, new_size - i->value_len); 1668 } 1669 goto update_hash; 1670 } 1671 1672 /* Compute min_offs and last. */ 1673 last = s->first; 1674 for (; !IS_LAST_ENTRY(last); last = next) { 1675 next = EXT4_XATTR_NEXT(last); 1676 if ((void *)next >= s->end) { 1677 EXT4_ERROR_INODE(inode, "corrupted xattr entries"); 1678 ret = -EFSCORRUPTED; 1679 goto out; 1680 } 1681 if (!last->e_value_inum && last->e_value_size) { 1682 size_t offs = le16_to_cpu(last->e_value_offs); 1683 if (offs < min_offs) 1684 min_offs = offs; 1685 } 1686 } 1687 1688 /* Check whether we have enough space. */ 1689 if (i->value) { 1690 size_t free; 1691 1692 free = min_offs - ((void *)last - s->base) - sizeof(__u32); 1693 if (!s->not_found) 1694 free += EXT4_XATTR_LEN(name_len) + old_size; 1695 1696 if (free < EXT4_XATTR_LEN(name_len) + new_size) { 1697 ret = -ENOSPC; 1698 goto out; 1699 } 1700 1701 /* 1702 * If storing the value in an external inode is an option, 1703 * reserve space for xattr entries/names in the external 1704 * attribute block so that a long value does not occupy the 1705 * whole space and prevent further entries being added. 1706 */ 1707 if (ext4_has_feature_ea_inode(inode->i_sb) && 1708 new_size && is_block && 1709 (min_offs + old_size - new_size) < 1710 EXT4_XATTR_BLOCK_RESERVE(inode)) { 1711 ret = -ENOSPC; 1712 goto out; 1713 } 1714 } 1715 1716 /* 1717 * Getting access to old and new ea inodes is subject to failures. 1718 * Finish that work before doing any modifications to the xattr data. 1719 */ 1720 if (!s->not_found && here->e_value_inum) { 1721 ret = ext4_xattr_inode_iget(inode, 1722 le32_to_cpu(here->e_value_inum), 1723 le32_to_cpu(here->e_hash), 1724 &old_ea_inode); 1725 if (ret) { 1726 old_ea_inode = NULL; 1727 goto out; 1728 } 1729 } 1730 if (i->value && in_inode) { 1731 WARN_ON_ONCE(!i->value_len); 1732 1733 ret = ext4_xattr_inode_alloc_quota(inode, i->value_len); 1734 if (ret) 1735 goto out; 1736 1737 ret = ext4_xattr_inode_lookup_create(handle, inode, i->value, 1738 i->value_len, 1739 &new_ea_inode); 1740 if (ret) { 1741 new_ea_inode = NULL; 1742 ext4_xattr_inode_free_quota(inode, NULL, i->value_len); 1743 goto out; 1744 } 1745 } 1746 1747 if (old_ea_inode) { 1748 /* We are ready to release ref count on the old_ea_inode. */ 1749 ret = ext4_xattr_inode_dec_ref(handle, old_ea_inode); 1750 if (ret) { 1751 /* Release newly required ref count on new_ea_inode. */ 1752 if (new_ea_inode) { 1753 int err; 1754 1755 err = ext4_xattr_inode_dec_ref(handle, 1756 new_ea_inode); 1757 if (err) 1758 ext4_warning_inode(new_ea_inode, 1759 "dec ref new_ea_inode err=%d", 1760 err); 1761 ext4_xattr_inode_free_quota(inode, new_ea_inode, 1762 i->value_len); 1763 } 1764 goto out; 1765 } 1766 1767 ext4_xattr_inode_free_quota(inode, old_ea_inode, 1768 le32_to_cpu(here->e_value_size)); 1769 } 1770 1771 /* No failures allowed past this point. */ 1772 1773 if (!s->not_found && here->e_value_size && !here->e_value_inum) { 1774 /* Remove the old value. */ 1775 void *first_val = s->base + min_offs; 1776 size_t offs = le16_to_cpu(here->e_value_offs); 1777 void *val = s->base + offs; 1778 1779 memmove(first_val + old_size, first_val, val - first_val); 1780 memset(first_val, 0, old_size); 1781 min_offs += old_size; 1782 1783 /* Adjust all value offsets. */ 1784 last = s->first; 1785 while (!IS_LAST_ENTRY(last)) { 1786 size_t o = le16_to_cpu(last->e_value_offs); 1787 1788 if (!last->e_value_inum && 1789 last->e_value_size && o < offs) 1790 last->e_value_offs = cpu_to_le16(o + old_size); 1791 last = EXT4_XATTR_NEXT(last); 1792 } 1793 } 1794 1795 if (!i->value) { 1796 /* Remove old name. */ 1797 size_t size = EXT4_XATTR_LEN(name_len); 1798 1799 last = ENTRY((void *)last - size); 1800 memmove(here, (void *)here + size, 1801 (void *)last - (void *)here + sizeof(__u32)); 1802 memset(last, 0, size); 1803 } else if (s->not_found) { 1804 /* Insert new name. */ 1805 size_t size = EXT4_XATTR_LEN(name_len); 1806 size_t rest = (void *)last - (void *)here + sizeof(__u32); 1807 1808 memmove((void *)here + size, here, rest); 1809 memset(here, 0, size); 1810 here->e_name_index = i->name_index; 1811 here->e_name_len = name_len; 1812 memcpy(here->e_name, i->name, name_len); 1813 } else { 1814 /* This is an update, reset value info. */ 1815 here->e_value_inum = 0; 1816 here->e_value_offs = 0; 1817 here->e_value_size = 0; 1818 } 1819 1820 if (i->value) { 1821 /* Insert new value. */ 1822 if (in_inode) { 1823 here->e_value_inum = cpu_to_le32(new_ea_inode->i_ino); 1824 } else if (i->value_len) { 1825 void *val = s->base + min_offs - new_size; 1826 1827 here->e_value_offs = cpu_to_le16(min_offs - new_size); 1828 if (i->value == EXT4_ZERO_XATTR_VALUE) { 1829 memset(val, 0, new_size); 1830 } else { 1831 memcpy(val, i->value, i->value_len); 1832 /* Clear padding bytes. */ 1833 memset(val + i->value_len, 0, 1834 new_size - i->value_len); 1835 } 1836 } 1837 here->e_value_size = cpu_to_le32(i->value_len); 1838 } 1839 1840 update_hash: 1841 if (i->value) { 1842 __le32 hash = 0; 1843 1844 /* Entry hash calculation. */ 1845 if (in_inode) { 1846 __le32 crc32c_hash; 1847 1848 /* 1849 * Feed crc32c hash instead of the raw value for entry 1850 * hash calculation. This is to avoid walking 1851 * potentially long value buffer again. 1852 */ 1853 crc32c_hash = cpu_to_le32( 1854 ext4_xattr_inode_get_hash(new_ea_inode)); 1855 hash = ext4_xattr_hash_entry(here->e_name, 1856 here->e_name_len, 1857 &crc32c_hash, 1); 1858 } else if (is_block) { 1859 __le32 *value = s->base + le16_to_cpu( 1860 here->e_value_offs); 1861 1862 hash = ext4_xattr_hash_entry(here->e_name, 1863 here->e_name_len, value, 1864 new_size >> 2); 1865 } 1866 here->e_hash = hash; 1867 } 1868 1869 if (is_block) 1870 ext4_xattr_rehash((struct ext4_xattr_header *)s->base); 1871 1872 ret = 0; 1873 out: 1874 iput(old_ea_inode); 1875 iput(new_ea_inode); 1876 return ret; 1877 } 1878 1879 struct ext4_xattr_block_find { 1880 struct ext4_xattr_search s; 1881 struct buffer_head *bh; 1882 }; 1883 1884 static int 1885 ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i, 1886 struct ext4_xattr_block_find *bs) 1887 { 1888 struct super_block *sb = inode->i_sb; 1889 int error; 1890 1891 ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld", 1892 i->name_index, i->name, i->value, (long)i->value_len); 1893 1894 if (EXT4_I(inode)->i_file_acl) { 1895 /* The inode already has an extended attribute block. */ 1896 bs->bh = ext4_sb_bread(sb, EXT4_I(inode)->i_file_acl, REQ_PRIO); 1897 if (IS_ERR(bs->bh)) { 1898 error = PTR_ERR(bs->bh); 1899 bs->bh = NULL; 1900 return error; 1901 } 1902 ea_bdebug(bs->bh, "b_count=%d, refcount=%d", 1903 atomic_read(&(bs->bh->b_count)), 1904 le32_to_cpu(BHDR(bs->bh)->h_refcount)); 1905 error = ext4_xattr_check_block(inode, bs->bh); 1906 if (error) 1907 return error; 1908 /* Find the named attribute. */ 1909 bs->s.base = BHDR(bs->bh); 1910 bs->s.first = BFIRST(bs->bh); 1911 bs->s.end = bs->bh->b_data + bs->bh->b_size; 1912 bs->s.here = bs->s.first; 1913 error = xattr_find_entry(inode, &bs->s.here, bs->s.end, 1914 i->name_index, i->name, 1); 1915 if (error && error != -ENODATA) 1916 return error; 1917 bs->s.not_found = error; 1918 } 1919 return 0; 1920 } 1921 1922 static int 1923 ext4_xattr_block_set(handle_t *handle, struct inode *inode, 1924 struct ext4_xattr_info *i, 1925 struct ext4_xattr_block_find *bs) 1926 { 1927 struct super_block *sb = inode->i_sb; 1928 struct buffer_head *new_bh = NULL; 1929 struct ext4_xattr_search s_copy = bs->s; 1930 struct ext4_xattr_search *s = &s_copy; 1931 struct mb_cache_entry *ce = NULL; 1932 int error = 0; 1933 struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode); 1934 struct inode *ea_inode = NULL, *tmp_inode; 1935 size_t old_ea_inode_quota = 0; 1936 unsigned int ea_ino; 1937 1938 1939 #define header(x) ((struct ext4_xattr_header *)(x)) 1940 1941 if (s->base) { 1942 int offset = (char *)s->here - bs->bh->b_data; 1943 1944 BUFFER_TRACE(bs->bh, "get_write_access"); 1945 error = ext4_journal_get_write_access(handle, sb, bs->bh, 1946 EXT4_JTR_NONE); 1947 if (error) 1948 goto cleanup; 1949 lock_buffer(bs->bh); 1950 1951 if (header(s->base)->h_refcount == cpu_to_le32(1)) { 1952 __u32 hash = le32_to_cpu(BHDR(bs->bh)->h_hash); 1953 1954 /* 1955 * This must happen under buffer lock for 1956 * ext4_xattr_block_set() to reliably detect modified 1957 * block 1958 */ 1959 if (ea_block_cache) { 1960 struct mb_cache_entry *oe; 1961 1962 oe = mb_cache_entry_delete_or_get(ea_block_cache, 1963 hash, bs->bh->b_blocknr); 1964 if (oe) { 1965 /* 1966 * Xattr block is getting reused. Leave 1967 * it alone. 1968 */ 1969 mb_cache_entry_put(ea_block_cache, oe); 1970 goto clone_block; 1971 } 1972 } 1973 ea_bdebug(bs->bh, "modifying in-place"); 1974 error = ext4_xattr_set_entry(i, s, handle, inode, 1975 true /* is_block */); 1976 ext4_xattr_block_csum_set(inode, bs->bh); 1977 unlock_buffer(bs->bh); 1978 if (error == -EFSCORRUPTED) 1979 goto bad_block; 1980 if (!error) 1981 error = ext4_handle_dirty_metadata(handle, 1982 inode, 1983 bs->bh); 1984 if (error) 1985 goto cleanup; 1986 goto inserted; 1987 } 1988 clone_block: 1989 unlock_buffer(bs->bh); 1990 ea_bdebug(bs->bh, "cloning"); 1991 s->base = kmemdup(BHDR(bs->bh), bs->bh->b_size, GFP_NOFS); 1992 error = -ENOMEM; 1993 if (s->base == NULL) 1994 goto cleanup; 1995 s->first = ENTRY(header(s->base)+1); 1996 header(s->base)->h_refcount = cpu_to_le32(1); 1997 s->here = ENTRY(s->base + offset); 1998 s->end = s->base + bs->bh->b_size; 1999 2000 /* 2001 * If existing entry points to an xattr inode, we need 2002 * to prevent ext4_xattr_set_entry() from decrementing 2003 * ref count on it because the reference belongs to the 2004 * original block. In this case, make the entry look 2005 * like it has an empty value. 2006 */ 2007 if (!s->not_found && s->here->e_value_inum) { 2008 ea_ino = le32_to_cpu(s->here->e_value_inum); 2009 error = ext4_xattr_inode_iget(inode, ea_ino, 2010 le32_to_cpu(s->here->e_hash), 2011 &tmp_inode); 2012 if (error) 2013 goto cleanup; 2014 2015 if (!ext4_test_inode_state(tmp_inode, 2016 EXT4_STATE_LUSTRE_EA_INODE)) { 2017 /* 2018 * Defer quota free call for previous 2019 * inode until success is guaranteed. 2020 */ 2021 old_ea_inode_quota = le32_to_cpu( 2022 s->here->e_value_size); 2023 } 2024 iput(tmp_inode); 2025 2026 s->here->e_value_inum = 0; 2027 s->here->e_value_size = 0; 2028 } 2029 } else { 2030 /* Allocate a buffer where we construct the new block. */ 2031 s->base = kzalloc(sb->s_blocksize, GFP_NOFS); 2032 error = -ENOMEM; 2033 if (s->base == NULL) 2034 goto cleanup; 2035 header(s->base)->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC); 2036 header(s->base)->h_blocks = cpu_to_le32(1); 2037 header(s->base)->h_refcount = cpu_to_le32(1); 2038 s->first = ENTRY(header(s->base)+1); 2039 s->here = ENTRY(header(s->base)+1); 2040 s->end = s->base + sb->s_blocksize; 2041 } 2042 2043 error = ext4_xattr_set_entry(i, s, handle, inode, true /* is_block */); 2044 if (error == -EFSCORRUPTED) 2045 goto bad_block; 2046 if (error) 2047 goto cleanup; 2048 2049 if (i->value && s->here->e_value_inum) { 2050 /* 2051 * A ref count on ea_inode has been taken as part of the call to 2052 * ext4_xattr_set_entry() above. We would like to drop this 2053 * extra ref but we have to wait until the xattr block is 2054 * initialized and has its own ref count on the ea_inode. 2055 */ 2056 ea_ino = le32_to_cpu(s->here->e_value_inum); 2057 error = ext4_xattr_inode_iget(inode, ea_ino, 2058 le32_to_cpu(s->here->e_hash), 2059 &ea_inode); 2060 if (error) { 2061 ea_inode = NULL; 2062 goto cleanup; 2063 } 2064 } 2065 2066 inserted: 2067 if (!IS_LAST_ENTRY(s->first)) { 2068 new_bh = ext4_xattr_block_cache_find(inode, header(s->base), 2069 &ce); 2070 if (new_bh) { 2071 /* We found an identical block in the cache. */ 2072 if (new_bh == bs->bh) 2073 ea_bdebug(new_bh, "keeping"); 2074 else { 2075 u32 ref; 2076 2077 WARN_ON_ONCE(dquot_initialize_needed(inode)); 2078 2079 /* The old block is released after updating 2080 the inode. */ 2081 error = dquot_alloc_block(inode, 2082 EXT4_C2B(EXT4_SB(sb), 1)); 2083 if (error) 2084 goto cleanup; 2085 BUFFER_TRACE(new_bh, "get_write_access"); 2086 error = ext4_journal_get_write_access( 2087 handle, sb, new_bh, 2088 EXT4_JTR_NONE); 2089 if (error) 2090 goto cleanup_dquot; 2091 lock_buffer(new_bh); 2092 /* 2093 * We have to be careful about races with 2094 * adding references to xattr block. Once we 2095 * hold buffer lock xattr block's state is 2096 * stable so we can check the additional 2097 * reference fits. 2098 */ 2099 ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1; 2100 if (ref > EXT4_XATTR_REFCOUNT_MAX) { 2101 /* 2102 * Undo everything and check mbcache 2103 * again. 2104 */ 2105 unlock_buffer(new_bh); 2106 dquot_free_block(inode, 2107 EXT4_C2B(EXT4_SB(sb), 2108 1)); 2109 brelse(new_bh); 2110 mb_cache_entry_put(ea_block_cache, ce); 2111 ce = NULL; 2112 new_bh = NULL; 2113 goto inserted; 2114 } 2115 BHDR(new_bh)->h_refcount = cpu_to_le32(ref); 2116 if (ref == EXT4_XATTR_REFCOUNT_MAX) 2117 clear_bit(MBE_REUSABLE_B, &ce->e_flags); 2118 ea_bdebug(new_bh, "reusing; refcount now=%d", 2119 ref); 2120 ext4_xattr_block_csum_set(inode, new_bh); 2121 unlock_buffer(new_bh); 2122 error = ext4_handle_dirty_metadata(handle, 2123 inode, 2124 new_bh); 2125 if (error) 2126 goto cleanup_dquot; 2127 } 2128 mb_cache_entry_touch(ea_block_cache, ce); 2129 mb_cache_entry_put(ea_block_cache, ce); 2130 ce = NULL; 2131 } else if (bs->bh && s->base == bs->bh->b_data) { 2132 /* We were modifying this block in-place. */ 2133 ea_bdebug(bs->bh, "keeping this block"); 2134 ext4_xattr_block_cache_insert(ea_block_cache, bs->bh); 2135 new_bh = bs->bh; 2136 get_bh(new_bh); 2137 } else { 2138 /* We need to allocate a new block */ 2139 ext4_fsblk_t goal, block; 2140 2141 WARN_ON_ONCE(dquot_initialize_needed(inode)); 2142 2143 goal = ext4_group_first_block_no(sb, 2144 EXT4_I(inode)->i_block_group); 2145 block = ext4_new_meta_blocks(handle, inode, goal, 0, 2146 NULL, &error); 2147 if (error) 2148 goto cleanup; 2149 2150 ea_idebug(inode, "creating block %llu", 2151 (unsigned long long)block); 2152 2153 new_bh = sb_getblk(sb, block); 2154 if (unlikely(!new_bh)) { 2155 error = -ENOMEM; 2156 getblk_failed: 2157 ext4_free_blocks(handle, inode, NULL, block, 1, 2158 EXT4_FREE_BLOCKS_METADATA); 2159 goto cleanup; 2160 } 2161 error = ext4_xattr_inode_inc_ref_all(handle, inode, 2162 ENTRY(header(s->base)+1)); 2163 if (error) 2164 goto getblk_failed; 2165 if (ea_inode) { 2166 /* Drop the extra ref on ea_inode. */ 2167 error = ext4_xattr_inode_dec_ref(handle, 2168 ea_inode); 2169 if (error) 2170 ext4_warning_inode(ea_inode, 2171 "dec ref error=%d", 2172 error); 2173 iput(ea_inode); 2174 ea_inode = NULL; 2175 } 2176 2177 lock_buffer(new_bh); 2178 error = ext4_journal_get_create_access(handle, sb, 2179 new_bh, EXT4_JTR_NONE); 2180 if (error) { 2181 unlock_buffer(new_bh); 2182 error = -EIO; 2183 goto getblk_failed; 2184 } 2185 memcpy(new_bh->b_data, s->base, new_bh->b_size); 2186 ext4_xattr_block_csum_set(inode, new_bh); 2187 set_buffer_uptodate(new_bh); 2188 unlock_buffer(new_bh); 2189 ext4_xattr_block_cache_insert(ea_block_cache, new_bh); 2190 error = ext4_handle_dirty_metadata(handle, inode, 2191 new_bh); 2192 if (error) 2193 goto cleanup; 2194 } 2195 } 2196 2197 if (old_ea_inode_quota) 2198 ext4_xattr_inode_free_quota(inode, NULL, old_ea_inode_quota); 2199 2200 /* Update the inode. */ 2201 EXT4_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0; 2202 2203 /* Drop the previous xattr block. */ 2204 if (bs->bh && bs->bh != new_bh) { 2205 struct ext4_xattr_inode_array *ea_inode_array = NULL; 2206 2207 ext4_xattr_release_block(handle, inode, bs->bh, 2208 &ea_inode_array, 2209 0 /* extra_credits */); 2210 ext4_xattr_inode_array_free(ea_inode_array); 2211 } 2212 error = 0; 2213 2214 cleanup: 2215 if (ea_inode) { 2216 int error2; 2217 2218 error2 = ext4_xattr_inode_dec_ref(handle, ea_inode); 2219 if (error2) 2220 ext4_warning_inode(ea_inode, "dec ref error=%d", 2221 error2); 2222 2223 /* If there was an error, revert the quota charge. */ 2224 if (error) 2225 ext4_xattr_inode_free_quota(inode, ea_inode, 2226 i_size_read(ea_inode)); 2227 iput(ea_inode); 2228 } 2229 if (ce) 2230 mb_cache_entry_put(ea_block_cache, ce); 2231 brelse(new_bh); 2232 if (!(bs->bh && s->base == bs->bh->b_data)) 2233 kfree(s->base); 2234 2235 return error; 2236 2237 cleanup_dquot: 2238 dquot_free_block(inode, EXT4_C2B(EXT4_SB(sb), 1)); 2239 goto cleanup; 2240 2241 bad_block: 2242 EXT4_ERROR_INODE(inode, "bad block %llu", 2243 EXT4_I(inode)->i_file_acl); 2244 goto cleanup; 2245 2246 #undef header 2247 } 2248 2249 int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i, 2250 struct ext4_xattr_ibody_find *is) 2251 { 2252 struct ext4_xattr_ibody_header *header; 2253 struct ext4_inode *raw_inode; 2254 int error; 2255 2256 if (!EXT4_INODE_HAS_XATTR_SPACE(inode)) 2257 return 0; 2258 2259 raw_inode = ext4_raw_inode(&is->iloc); 2260 header = IHDR(inode, raw_inode); 2261 is->s.base = is->s.first = IFIRST(header); 2262 is->s.here = is->s.first; 2263 is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; 2264 if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) { 2265 error = xattr_check_inode(inode, header, is->s.end); 2266 if (error) 2267 return error; 2268 /* Find the named attribute. */ 2269 error = xattr_find_entry(inode, &is->s.here, is->s.end, 2270 i->name_index, i->name, 0); 2271 if (error && error != -ENODATA) 2272 return error; 2273 is->s.not_found = error; 2274 } 2275 return 0; 2276 } 2277 2278 int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode, 2279 struct ext4_xattr_info *i, 2280 struct ext4_xattr_ibody_find *is) 2281 { 2282 struct ext4_xattr_ibody_header *header; 2283 struct ext4_xattr_search *s = &is->s; 2284 int error; 2285 2286 if (!EXT4_INODE_HAS_XATTR_SPACE(inode)) 2287 return -ENOSPC; 2288 2289 error = ext4_xattr_set_entry(i, s, handle, inode, false /* is_block */); 2290 if (error) 2291 return error; 2292 header = IHDR(inode, ext4_raw_inode(&is->iloc)); 2293 if (!IS_LAST_ENTRY(s->first)) { 2294 header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC); 2295 ext4_set_inode_state(inode, EXT4_STATE_XATTR); 2296 } else { 2297 header->h_magic = cpu_to_le32(0); 2298 ext4_clear_inode_state(inode, EXT4_STATE_XATTR); 2299 } 2300 return 0; 2301 } 2302 2303 static int ext4_xattr_value_same(struct ext4_xattr_search *s, 2304 struct ext4_xattr_info *i) 2305 { 2306 void *value; 2307 2308 /* When e_value_inum is set the value is stored externally. */ 2309 if (s->here->e_value_inum) 2310 return 0; 2311 if (le32_to_cpu(s->here->e_value_size) != i->value_len) 2312 return 0; 2313 value = ((void *)s->base) + le16_to_cpu(s->here->e_value_offs); 2314 return !memcmp(value, i->value, i->value_len); 2315 } 2316 2317 static struct buffer_head *ext4_xattr_get_block(struct inode *inode) 2318 { 2319 struct buffer_head *bh; 2320 int error; 2321 2322 if (!EXT4_I(inode)->i_file_acl) 2323 return NULL; 2324 bh = ext4_sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl, REQ_PRIO); 2325 if (IS_ERR(bh)) 2326 return bh; 2327 error = ext4_xattr_check_block(inode, bh); 2328 if (error) { 2329 brelse(bh); 2330 return ERR_PTR(error); 2331 } 2332 return bh; 2333 } 2334 2335 /* 2336 * ext4_xattr_set_handle() 2337 * 2338 * Create, replace or remove an extended attribute for this inode. Value 2339 * is NULL to remove an existing extended attribute, and non-NULL to 2340 * either replace an existing extended attribute, or create a new extended 2341 * attribute. The flags XATTR_REPLACE and XATTR_CREATE 2342 * specify that an extended attribute must exist and must not exist 2343 * previous to the call, respectively. 2344 * 2345 * Returns 0, or a negative error number on failure. 2346 */ 2347 int 2348 ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, 2349 const char *name, const void *value, size_t value_len, 2350 int flags) 2351 { 2352 struct ext4_xattr_info i = { 2353 .name_index = name_index, 2354 .name = name, 2355 .value = value, 2356 .value_len = value_len, 2357 .in_inode = 0, 2358 }; 2359 struct ext4_xattr_ibody_find is = { 2360 .s = { .not_found = -ENODATA, }, 2361 }; 2362 struct ext4_xattr_block_find bs = { 2363 .s = { .not_found = -ENODATA, }, 2364 }; 2365 int no_expand; 2366 int error; 2367 2368 if (!name) 2369 return -EINVAL; 2370 if (strlen(name) > 255) 2371 return -ERANGE; 2372 2373 ext4_write_lock_xattr(inode, &no_expand); 2374 2375 /* Check journal credits under write lock. */ 2376 if (ext4_handle_valid(handle)) { 2377 struct buffer_head *bh; 2378 int credits; 2379 2380 bh = ext4_xattr_get_block(inode); 2381 if (IS_ERR(bh)) { 2382 error = PTR_ERR(bh); 2383 goto cleanup; 2384 } 2385 2386 credits = __ext4_xattr_set_credits(inode->i_sb, inode, bh, 2387 value_len, 2388 flags & XATTR_CREATE); 2389 brelse(bh); 2390 2391 if (jbd2_handle_buffer_credits(handle) < credits) { 2392 error = -ENOSPC; 2393 goto cleanup; 2394 } 2395 WARN_ON_ONCE(!(current->flags & PF_MEMALLOC_NOFS)); 2396 } 2397 2398 error = ext4_reserve_inode_write(handle, inode, &is.iloc); 2399 if (error) 2400 goto cleanup; 2401 2402 if (ext4_test_inode_state(inode, EXT4_STATE_NEW)) { 2403 struct ext4_inode *raw_inode = ext4_raw_inode(&is.iloc); 2404 memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); 2405 ext4_clear_inode_state(inode, EXT4_STATE_NEW); 2406 } 2407 2408 error = ext4_xattr_ibody_find(inode, &i, &is); 2409 if (error) 2410 goto cleanup; 2411 if (is.s.not_found) 2412 error = ext4_xattr_block_find(inode, &i, &bs); 2413 if (error) 2414 goto cleanup; 2415 if (is.s.not_found && bs.s.not_found) { 2416 error = -ENODATA; 2417 if (flags & XATTR_REPLACE) 2418 goto cleanup; 2419 error = 0; 2420 if (!value) 2421 goto cleanup; 2422 } else { 2423 error = -EEXIST; 2424 if (flags & XATTR_CREATE) 2425 goto cleanup; 2426 } 2427 2428 if (!value) { 2429 if (!is.s.not_found) 2430 error = ext4_xattr_ibody_set(handle, inode, &i, &is); 2431 else if (!bs.s.not_found) 2432 error = ext4_xattr_block_set(handle, inode, &i, &bs); 2433 } else { 2434 error = 0; 2435 /* Xattr value did not change? Save us some work and bail out */ 2436 if (!is.s.not_found && ext4_xattr_value_same(&is.s, &i)) 2437 goto cleanup; 2438 if (!bs.s.not_found && ext4_xattr_value_same(&bs.s, &i)) 2439 goto cleanup; 2440 2441 if (ext4_has_feature_ea_inode(inode->i_sb) && 2442 (EXT4_XATTR_SIZE(i.value_len) > 2443 EXT4_XATTR_MIN_LARGE_EA_SIZE(inode->i_sb->s_blocksize))) 2444 i.in_inode = 1; 2445 retry_inode: 2446 error = ext4_xattr_ibody_set(handle, inode, &i, &is); 2447 if (!error && !bs.s.not_found) { 2448 i.value = NULL; 2449 error = ext4_xattr_block_set(handle, inode, &i, &bs); 2450 } else if (error == -ENOSPC) { 2451 if (EXT4_I(inode)->i_file_acl && !bs.s.base) { 2452 brelse(bs.bh); 2453 bs.bh = NULL; 2454 error = ext4_xattr_block_find(inode, &i, &bs); 2455 if (error) 2456 goto cleanup; 2457 } 2458 error = ext4_xattr_block_set(handle, inode, &i, &bs); 2459 if (!error && !is.s.not_found) { 2460 i.value = NULL; 2461 error = ext4_xattr_ibody_set(handle, inode, &i, 2462 &is); 2463 } else if (error == -ENOSPC) { 2464 /* 2465 * Xattr does not fit in the block, store at 2466 * external inode if possible. 2467 */ 2468 if (ext4_has_feature_ea_inode(inode->i_sb) && 2469 i.value_len && !i.in_inode) { 2470 i.in_inode = 1; 2471 goto retry_inode; 2472 } 2473 } 2474 } 2475 } 2476 if (!error) { 2477 ext4_xattr_update_super_block(handle, inode->i_sb); 2478 inode->i_ctime = current_time(inode); 2479 inode_inc_iversion(inode); 2480 if (!value) 2481 no_expand = 0; 2482 error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); 2483 /* 2484 * The bh is consumed by ext4_mark_iloc_dirty, even with 2485 * error != 0. 2486 */ 2487 is.iloc.bh = NULL; 2488 if (IS_SYNC(inode)) 2489 ext4_handle_sync(handle); 2490 } 2491 ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, handle); 2492 2493 cleanup: 2494 brelse(is.iloc.bh); 2495 brelse(bs.bh); 2496 ext4_write_unlock_xattr(inode, &no_expand); 2497 return error; 2498 } 2499 2500 int ext4_xattr_set_credits(struct inode *inode, size_t value_len, 2501 bool is_create, int *credits) 2502 { 2503 struct buffer_head *bh; 2504 int err; 2505 2506 *credits = 0; 2507 2508 if (!EXT4_SB(inode->i_sb)->s_journal) 2509 return 0; 2510 2511 down_read(&EXT4_I(inode)->xattr_sem); 2512 2513 bh = ext4_xattr_get_block(inode); 2514 if (IS_ERR(bh)) { 2515 err = PTR_ERR(bh); 2516 } else { 2517 *credits = __ext4_xattr_set_credits(inode->i_sb, inode, bh, 2518 value_len, is_create); 2519 brelse(bh); 2520 err = 0; 2521 } 2522 2523 up_read(&EXT4_I(inode)->xattr_sem); 2524 return err; 2525 } 2526 2527 /* 2528 * ext4_xattr_set() 2529 * 2530 * Like ext4_xattr_set_handle, but start from an inode. This extended 2531 * attribute modification is a filesystem transaction by itself. 2532 * 2533 * Returns 0, or a negative error number on failure. 2534 */ 2535 int 2536 ext4_xattr_set(struct inode *inode, int name_index, const char *name, 2537 const void *value, size_t value_len, int flags) 2538 { 2539 handle_t *handle; 2540 struct super_block *sb = inode->i_sb; 2541 int error, retries = 0; 2542 int credits; 2543 2544 error = dquot_initialize(inode); 2545 if (error) 2546 return error; 2547 2548 retry: 2549 error = ext4_xattr_set_credits(inode, value_len, flags & XATTR_CREATE, 2550 &credits); 2551 if (error) 2552 return error; 2553 2554 handle = ext4_journal_start(inode, EXT4_HT_XATTR, credits); 2555 if (IS_ERR(handle)) { 2556 error = PTR_ERR(handle); 2557 } else { 2558 int error2; 2559 2560 error = ext4_xattr_set_handle(handle, inode, name_index, name, 2561 value, value_len, flags); 2562 error2 = ext4_journal_stop(handle); 2563 if (error == -ENOSPC && 2564 ext4_should_retry_alloc(sb, &retries)) 2565 goto retry; 2566 if (error == 0) 2567 error = error2; 2568 } 2569 ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, NULL); 2570 2571 return error; 2572 } 2573 2574 /* 2575 * Shift the EA entries in the inode to create space for the increased 2576 * i_extra_isize. 2577 */ 2578 static void ext4_xattr_shift_entries(struct ext4_xattr_entry *entry, 2579 int value_offs_shift, void *to, 2580 void *from, size_t n) 2581 { 2582 struct ext4_xattr_entry *last = entry; 2583 int new_offs; 2584 2585 /* We always shift xattr headers further thus offsets get lower */ 2586 BUG_ON(value_offs_shift > 0); 2587 2588 /* Adjust the value offsets of the entries */ 2589 for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { 2590 if (!last->e_value_inum && last->e_value_size) { 2591 new_offs = le16_to_cpu(last->e_value_offs) + 2592 value_offs_shift; 2593 last->e_value_offs = cpu_to_le16(new_offs); 2594 } 2595 } 2596 /* Shift the entries by n bytes */ 2597 memmove(to, from, n); 2598 } 2599 2600 /* 2601 * Move xattr pointed to by 'entry' from inode into external xattr block 2602 */ 2603 static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode, 2604 struct ext4_inode *raw_inode, 2605 struct ext4_xattr_entry *entry) 2606 { 2607 struct ext4_xattr_ibody_find *is = NULL; 2608 struct ext4_xattr_block_find *bs = NULL; 2609 char *buffer = NULL, *b_entry_name = NULL; 2610 size_t value_size = le32_to_cpu(entry->e_value_size); 2611 struct ext4_xattr_info i = { 2612 .value = NULL, 2613 .value_len = 0, 2614 .name_index = entry->e_name_index, 2615 .in_inode = !!entry->e_value_inum, 2616 }; 2617 struct ext4_xattr_ibody_header *header = IHDR(inode, raw_inode); 2618 int error; 2619 2620 is = kzalloc(sizeof(struct ext4_xattr_ibody_find), GFP_NOFS); 2621 bs = kzalloc(sizeof(struct ext4_xattr_block_find), GFP_NOFS); 2622 b_entry_name = kmalloc(entry->e_name_len + 1, GFP_NOFS); 2623 if (!is || !bs || !b_entry_name) { 2624 error = -ENOMEM; 2625 goto out; 2626 } 2627 2628 is->s.not_found = -ENODATA; 2629 bs->s.not_found = -ENODATA; 2630 is->iloc.bh = NULL; 2631 bs->bh = NULL; 2632 2633 /* Save the entry name and the entry value */ 2634 if (entry->e_value_inum) { 2635 buffer = kvmalloc(value_size, GFP_NOFS); 2636 if (!buffer) { 2637 error = -ENOMEM; 2638 goto out; 2639 } 2640 2641 error = ext4_xattr_inode_get(inode, entry, buffer, value_size); 2642 if (error) 2643 goto out; 2644 } else { 2645 size_t value_offs = le16_to_cpu(entry->e_value_offs); 2646 buffer = (void *)IFIRST(header) + value_offs; 2647 } 2648 2649 memcpy(b_entry_name, entry->e_name, entry->e_name_len); 2650 b_entry_name[entry->e_name_len] = '\0'; 2651 i.name = b_entry_name; 2652 2653 error = ext4_get_inode_loc(inode, &is->iloc); 2654 if (error) 2655 goto out; 2656 2657 error = ext4_xattr_ibody_find(inode, &i, is); 2658 if (error) 2659 goto out; 2660 2661 i.value = buffer; 2662 i.value_len = value_size; 2663 error = ext4_xattr_block_find(inode, &i, bs); 2664 if (error) 2665 goto out; 2666 2667 /* Move ea entry from the inode into the block */ 2668 error = ext4_xattr_block_set(handle, inode, &i, bs); 2669 if (error) 2670 goto out; 2671 2672 /* Remove the chosen entry from the inode */ 2673 i.value = NULL; 2674 i.value_len = 0; 2675 error = ext4_xattr_ibody_set(handle, inode, &i, is); 2676 2677 out: 2678 kfree(b_entry_name); 2679 if (entry->e_value_inum && buffer) 2680 kvfree(buffer); 2681 if (is) 2682 brelse(is->iloc.bh); 2683 if (bs) 2684 brelse(bs->bh); 2685 kfree(is); 2686 kfree(bs); 2687 2688 return error; 2689 } 2690 2691 static int ext4_xattr_make_inode_space(handle_t *handle, struct inode *inode, 2692 struct ext4_inode *raw_inode, 2693 int isize_diff, size_t ifree, 2694 size_t bfree, int *total_ino) 2695 { 2696 struct ext4_xattr_ibody_header *header = IHDR(inode, raw_inode); 2697 struct ext4_xattr_entry *small_entry; 2698 struct ext4_xattr_entry *entry; 2699 struct ext4_xattr_entry *last; 2700 unsigned int entry_size; /* EA entry size */ 2701 unsigned int total_size; /* EA entry size + value size */ 2702 unsigned int min_total_size; 2703 int error; 2704 2705 while (isize_diff > ifree) { 2706 entry = NULL; 2707 small_entry = NULL; 2708 min_total_size = ~0U; 2709 last = IFIRST(header); 2710 /* Find the entry best suited to be pushed into EA block */ 2711 for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { 2712 /* never move system.data out of the inode */ 2713 if ((last->e_name_len == 4) && 2714 (last->e_name_index == EXT4_XATTR_INDEX_SYSTEM) && 2715 !memcmp(last->e_name, "data", 4)) 2716 continue; 2717 total_size = EXT4_XATTR_LEN(last->e_name_len); 2718 if (!last->e_value_inum) 2719 total_size += EXT4_XATTR_SIZE( 2720 le32_to_cpu(last->e_value_size)); 2721 if (total_size <= bfree && 2722 total_size < min_total_size) { 2723 if (total_size + ifree < isize_diff) { 2724 small_entry = last; 2725 } else { 2726 entry = last; 2727 min_total_size = total_size; 2728 } 2729 } 2730 } 2731 2732 if (entry == NULL) { 2733 if (small_entry == NULL) 2734 return -ENOSPC; 2735 entry = small_entry; 2736 } 2737 2738 entry_size = EXT4_XATTR_LEN(entry->e_name_len); 2739 total_size = entry_size; 2740 if (!entry->e_value_inum) 2741 total_size += EXT4_XATTR_SIZE( 2742 le32_to_cpu(entry->e_value_size)); 2743 error = ext4_xattr_move_to_block(handle, inode, raw_inode, 2744 entry); 2745 if (error) 2746 return error; 2747 2748 *total_ino -= entry_size; 2749 ifree += total_size; 2750 bfree -= total_size; 2751 } 2752 2753 return 0; 2754 } 2755 2756 /* 2757 * Expand an inode by new_extra_isize bytes when EAs are present. 2758 * Returns 0 on success or negative error number on failure. 2759 */ 2760 int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, 2761 struct ext4_inode *raw_inode, handle_t *handle) 2762 { 2763 struct ext4_xattr_ibody_header *header; 2764 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 2765 static unsigned int mnt_count; 2766 size_t min_offs; 2767 size_t ifree, bfree; 2768 int total_ino; 2769 void *base, *end; 2770 int error = 0, tried_min_extra_isize = 0; 2771 int s_min_extra_isize = le16_to_cpu(sbi->s_es->s_min_extra_isize); 2772 int isize_diff; /* How much do we need to grow i_extra_isize */ 2773 2774 retry: 2775 isize_diff = new_extra_isize - EXT4_I(inode)->i_extra_isize; 2776 if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) 2777 return 0; 2778 2779 header = IHDR(inode, raw_inode); 2780 2781 /* 2782 * Check if enough free space is available in the inode to shift the 2783 * entries ahead by new_extra_isize. 2784 */ 2785 2786 base = IFIRST(header); 2787 end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; 2788 min_offs = end - base; 2789 total_ino = sizeof(struct ext4_xattr_ibody_header) + sizeof(u32); 2790 2791 error = xattr_check_inode(inode, header, end); 2792 if (error) 2793 goto cleanup; 2794 2795 ifree = ext4_xattr_free_space(base, &min_offs, base, &total_ino); 2796 if (ifree >= isize_diff) 2797 goto shift; 2798 2799 /* 2800 * Enough free space isn't available in the inode, check if 2801 * EA block can hold new_extra_isize bytes. 2802 */ 2803 if (EXT4_I(inode)->i_file_acl) { 2804 struct buffer_head *bh; 2805 2806 bh = ext4_sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl, REQ_PRIO); 2807 if (IS_ERR(bh)) { 2808 error = PTR_ERR(bh); 2809 goto cleanup; 2810 } 2811 error = ext4_xattr_check_block(inode, bh); 2812 if (error) { 2813 brelse(bh); 2814 goto cleanup; 2815 } 2816 base = BHDR(bh); 2817 end = bh->b_data + bh->b_size; 2818 min_offs = end - base; 2819 bfree = ext4_xattr_free_space(BFIRST(bh), &min_offs, base, 2820 NULL); 2821 brelse(bh); 2822 if (bfree + ifree < isize_diff) { 2823 if (!tried_min_extra_isize && s_min_extra_isize) { 2824 tried_min_extra_isize++; 2825 new_extra_isize = s_min_extra_isize; 2826 goto retry; 2827 } 2828 error = -ENOSPC; 2829 goto cleanup; 2830 } 2831 } else { 2832 bfree = inode->i_sb->s_blocksize; 2833 } 2834 2835 error = ext4_xattr_make_inode_space(handle, inode, raw_inode, 2836 isize_diff, ifree, bfree, 2837 &total_ino); 2838 if (error) { 2839 if (error == -ENOSPC && !tried_min_extra_isize && 2840 s_min_extra_isize) { 2841 tried_min_extra_isize++; 2842 new_extra_isize = s_min_extra_isize; 2843 goto retry; 2844 } 2845 goto cleanup; 2846 } 2847 shift: 2848 /* Adjust the offsets and shift the remaining entries ahead */ 2849 ext4_xattr_shift_entries(IFIRST(header), EXT4_I(inode)->i_extra_isize 2850 - new_extra_isize, (void *)raw_inode + 2851 EXT4_GOOD_OLD_INODE_SIZE + new_extra_isize, 2852 (void *)header, total_ino); 2853 EXT4_I(inode)->i_extra_isize = new_extra_isize; 2854 2855 if (ext4_has_inline_data(inode)) 2856 error = ext4_find_inline_data_nolock(inode); 2857 2858 cleanup: 2859 if (error && (mnt_count != le16_to_cpu(sbi->s_es->s_mnt_count))) { 2860 ext4_warning(inode->i_sb, "Unable to expand inode %lu. Delete some EAs or run e2fsck.", 2861 inode->i_ino); 2862 mnt_count = le16_to_cpu(sbi->s_es->s_mnt_count); 2863 } 2864 return error; 2865 } 2866 2867 #define EIA_INCR 16 /* must be 2^n */ 2868 #define EIA_MASK (EIA_INCR - 1) 2869 2870 /* Add the large xattr @inode into @ea_inode_array for deferred iput(). 2871 * If @ea_inode_array is new or full it will be grown and the old 2872 * contents copied over. 2873 */ 2874 static int 2875 ext4_expand_inode_array(struct ext4_xattr_inode_array **ea_inode_array, 2876 struct inode *inode) 2877 { 2878 if (*ea_inode_array == NULL) { 2879 /* 2880 * Start with 15 inodes, so it fits into a power-of-two size. 2881 * If *ea_inode_array is NULL, this is essentially offsetof() 2882 */ 2883 (*ea_inode_array) = 2884 kmalloc(offsetof(struct ext4_xattr_inode_array, 2885 inodes[EIA_MASK]), 2886 GFP_NOFS); 2887 if (*ea_inode_array == NULL) 2888 return -ENOMEM; 2889 (*ea_inode_array)->count = 0; 2890 } else if (((*ea_inode_array)->count & EIA_MASK) == EIA_MASK) { 2891 /* expand the array once all 15 + n * 16 slots are full */ 2892 struct ext4_xattr_inode_array *new_array = NULL; 2893 int count = (*ea_inode_array)->count; 2894 2895 /* if new_array is NULL, this is essentially offsetof() */ 2896 new_array = kmalloc( 2897 offsetof(struct ext4_xattr_inode_array, 2898 inodes[count + EIA_INCR]), 2899 GFP_NOFS); 2900 if (new_array == NULL) 2901 return -ENOMEM; 2902 memcpy(new_array, *ea_inode_array, 2903 offsetof(struct ext4_xattr_inode_array, inodes[count])); 2904 kfree(*ea_inode_array); 2905 *ea_inode_array = new_array; 2906 } 2907 (*ea_inode_array)->inodes[(*ea_inode_array)->count++] = inode; 2908 return 0; 2909 } 2910 2911 /* 2912 * ext4_xattr_delete_inode() 2913 * 2914 * Free extended attribute resources associated with this inode. Traverse 2915 * all entries and decrement reference on any xattr inodes associated with this 2916 * inode. This is called immediately before an inode is freed. We have exclusive 2917 * access to the inode. If an orphan inode is deleted it will also release its 2918 * references on xattr block and xattr inodes. 2919 */ 2920 int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode, 2921 struct ext4_xattr_inode_array **ea_inode_array, 2922 int extra_credits) 2923 { 2924 struct buffer_head *bh = NULL; 2925 struct ext4_xattr_ibody_header *header; 2926 struct ext4_iloc iloc = { .bh = NULL }; 2927 struct ext4_xattr_entry *entry; 2928 struct inode *ea_inode; 2929 int error; 2930 2931 error = ext4_journal_ensure_credits(handle, extra_credits, 2932 ext4_free_metadata_revoke_credits(inode->i_sb, 1)); 2933 if (error < 0) { 2934 EXT4_ERROR_INODE(inode, "ensure credits (error %d)", error); 2935 goto cleanup; 2936 } 2937 2938 if (ext4_has_feature_ea_inode(inode->i_sb) && 2939 ext4_test_inode_state(inode, EXT4_STATE_XATTR)) { 2940 2941 error = ext4_get_inode_loc(inode, &iloc); 2942 if (error) { 2943 EXT4_ERROR_INODE(inode, "inode loc (error %d)", error); 2944 goto cleanup; 2945 } 2946 2947 error = ext4_journal_get_write_access(handle, inode->i_sb, 2948 iloc.bh, EXT4_JTR_NONE); 2949 if (error) { 2950 EXT4_ERROR_INODE(inode, "write access (error %d)", 2951 error); 2952 goto cleanup; 2953 } 2954 2955 header = IHDR(inode, ext4_raw_inode(&iloc)); 2956 if (header->h_magic == cpu_to_le32(EXT4_XATTR_MAGIC)) 2957 ext4_xattr_inode_dec_ref_all(handle, inode, iloc.bh, 2958 IFIRST(header), 2959 false /* block_csum */, 2960 ea_inode_array, 2961 extra_credits, 2962 false /* skip_quota */); 2963 } 2964 2965 if (EXT4_I(inode)->i_file_acl) { 2966 bh = ext4_sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl, REQ_PRIO); 2967 if (IS_ERR(bh)) { 2968 error = PTR_ERR(bh); 2969 if (error == -EIO) { 2970 EXT4_ERROR_INODE_ERR(inode, EIO, 2971 "block %llu read error", 2972 EXT4_I(inode)->i_file_acl); 2973 } 2974 bh = NULL; 2975 goto cleanup; 2976 } 2977 error = ext4_xattr_check_block(inode, bh); 2978 if (error) 2979 goto cleanup; 2980 2981 if (ext4_has_feature_ea_inode(inode->i_sb)) { 2982 for (entry = BFIRST(bh); !IS_LAST_ENTRY(entry); 2983 entry = EXT4_XATTR_NEXT(entry)) { 2984 if (!entry->e_value_inum) 2985 continue; 2986 error = ext4_xattr_inode_iget(inode, 2987 le32_to_cpu(entry->e_value_inum), 2988 le32_to_cpu(entry->e_hash), 2989 &ea_inode); 2990 if (error) 2991 continue; 2992 ext4_xattr_inode_free_quota(inode, ea_inode, 2993 le32_to_cpu(entry->e_value_size)); 2994 iput(ea_inode); 2995 } 2996 2997 } 2998 2999 ext4_xattr_release_block(handle, inode, bh, ea_inode_array, 3000 extra_credits); 3001 /* 3002 * Update i_file_acl value in the same transaction that releases 3003 * block. 3004 */ 3005 EXT4_I(inode)->i_file_acl = 0; 3006 error = ext4_mark_inode_dirty(handle, inode); 3007 if (error) { 3008 EXT4_ERROR_INODE(inode, "mark inode dirty (error %d)", 3009 error); 3010 goto cleanup; 3011 } 3012 ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, handle); 3013 } 3014 error = 0; 3015 cleanup: 3016 brelse(iloc.bh); 3017 brelse(bh); 3018 return error; 3019 } 3020 3021 void ext4_xattr_inode_array_free(struct ext4_xattr_inode_array *ea_inode_array) 3022 { 3023 int idx; 3024 3025 if (ea_inode_array == NULL) 3026 return; 3027 3028 for (idx = 0; idx < ea_inode_array->count; ++idx) 3029 iput(ea_inode_array->inodes[idx]); 3030 kfree(ea_inode_array); 3031 } 3032 3033 /* 3034 * ext4_xattr_block_cache_insert() 3035 * 3036 * Create a new entry in the extended attribute block cache, and insert 3037 * it unless such an entry is already in the cache. 3038 * 3039 * Returns 0, or a negative error number on failure. 3040 */ 3041 static void 3042 ext4_xattr_block_cache_insert(struct mb_cache *ea_block_cache, 3043 struct buffer_head *bh) 3044 { 3045 struct ext4_xattr_header *header = BHDR(bh); 3046 __u32 hash = le32_to_cpu(header->h_hash); 3047 int reusable = le32_to_cpu(header->h_refcount) < 3048 EXT4_XATTR_REFCOUNT_MAX; 3049 int error; 3050 3051 if (!ea_block_cache) 3052 return; 3053 error = mb_cache_entry_create(ea_block_cache, GFP_NOFS, hash, 3054 bh->b_blocknr, reusable); 3055 if (error) { 3056 if (error == -EBUSY) 3057 ea_bdebug(bh, "already in cache"); 3058 } else 3059 ea_bdebug(bh, "inserting [%x]", (int)hash); 3060 } 3061 3062 /* 3063 * ext4_xattr_cmp() 3064 * 3065 * Compare two extended attribute blocks for equality. 3066 * 3067 * Returns 0 if the blocks are equal, 1 if they differ, and 3068 * a negative error number on errors. 3069 */ 3070 static int 3071 ext4_xattr_cmp(struct ext4_xattr_header *header1, 3072 struct ext4_xattr_header *header2) 3073 { 3074 struct ext4_xattr_entry *entry1, *entry2; 3075 3076 entry1 = ENTRY(header1+1); 3077 entry2 = ENTRY(header2+1); 3078 while (!IS_LAST_ENTRY(entry1)) { 3079 if (IS_LAST_ENTRY(entry2)) 3080 return 1; 3081 if (entry1->e_hash != entry2->e_hash || 3082 entry1->e_name_index != entry2->e_name_index || 3083 entry1->e_name_len != entry2->e_name_len || 3084 entry1->e_value_size != entry2->e_value_size || 3085 entry1->e_value_inum != entry2->e_value_inum || 3086 memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len)) 3087 return 1; 3088 if (!entry1->e_value_inum && 3089 memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs), 3090 (char *)header2 + le16_to_cpu(entry2->e_value_offs), 3091 le32_to_cpu(entry1->e_value_size))) 3092 return 1; 3093 3094 entry1 = EXT4_XATTR_NEXT(entry1); 3095 entry2 = EXT4_XATTR_NEXT(entry2); 3096 } 3097 if (!IS_LAST_ENTRY(entry2)) 3098 return 1; 3099 return 0; 3100 } 3101 3102 /* 3103 * ext4_xattr_block_cache_find() 3104 * 3105 * Find an identical extended attribute block. 3106 * 3107 * Returns a pointer to the block found, or NULL if such a block was 3108 * not found or an error occurred. 3109 */ 3110 static struct buffer_head * 3111 ext4_xattr_block_cache_find(struct inode *inode, 3112 struct ext4_xattr_header *header, 3113 struct mb_cache_entry **pce) 3114 { 3115 __u32 hash = le32_to_cpu(header->h_hash); 3116 struct mb_cache_entry *ce; 3117 struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode); 3118 3119 if (!ea_block_cache) 3120 return NULL; 3121 if (!header->h_hash) 3122 return NULL; /* never share */ 3123 ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); 3124 ce = mb_cache_entry_find_first(ea_block_cache, hash); 3125 while (ce) { 3126 struct buffer_head *bh; 3127 3128 bh = ext4_sb_bread(inode->i_sb, ce->e_value, REQ_PRIO); 3129 if (IS_ERR(bh)) { 3130 if (PTR_ERR(bh) == -ENOMEM) 3131 return NULL; 3132 bh = NULL; 3133 EXT4_ERROR_INODE(inode, "block %lu read error", 3134 (unsigned long)ce->e_value); 3135 } else if (ext4_xattr_cmp(header, BHDR(bh)) == 0) { 3136 *pce = ce; 3137 return bh; 3138 } 3139 brelse(bh); 3140 ce = mb_cache_entry_find_next(ea_block_cache, ce); 3141 } 3142 return NULL; 3143 } 3144 3145 #define NAME_HASH_SHIFT 5 3146 #define VALUE_HASH_SHIFT 16 3147 3148 /* 3149 * ext4_xattr_hash_entry() 3150 * 3151 * Compute the hash of an extended attribute. 3152 */ 3153 static __le32 ext4_xattr_hash_entry(char *name, size_t name_len, __le32 *value, 3154 size_t value_count) 3155 { 3156 __u32 hash = 0; 3157 3158 while (name_len--) { 3159 hash = (hash << NAME_HASH_SHIFT) ^ 3160 (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^ 3161 (unsigned char)*name++; 3162 } 3163 while (value_count--) { 3164 hash = (hash << VALUE_HASH_SHIFT) ^ 3165 (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^ 3166 le32_to_cpu(*value++); 3167 } 3168 return cpu_to_le32(hash); 3169 } 3170 3171 /* 3172 * ext4_xattr_hash_entry_signed() 3173 * 3174 * Compute the hash of an extended attribute incorrectly. 3175 */ 3176 static __le32 ext4_xattr_hash_entry_signed(char *name, size_t name_len, __le32 *value, size_t value_count) 3177 { 3178 __u32 hash = 0; 3179 3180 while (name_len--) { 3181 hash = (hash << NAME_HASH_SHIFT) ^ 3182 (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^ 3183 (signed char)*name++; 3184 } 3185 while (value_count--) { 3186 hash = (hash << VALUE_HASH_SHIFT) ^ 3187 (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^ 3188 le32_to_cpu(*value++); 3189 } 3190 return cpu_to_le32(hash); 3191 } 3192 3193 #undef NAME_HASH_SHIFT 3194 #undef VALUE_HASH_SHIFT 3195 3196 #define BLOCK_HASH_SHIFT 16 3197 3198 /* 3199 * ext4_xattr_rehash() 3200 * 3201 * Re-compute the extended attribute hash value after an entry has changed. 3202 */ 3203 static void ext4_xattr_rehash(struct ext4_xattr_header *header) 3204 { 3205 struct ext4_xattr_entry *here; 3206 __u32 hash = 0; 3207 3208 here = ENTRY(header+1); 3209 while (!IS_LAST_ENTRY(here)) { 3210 if (!here->e_hash) { 3211 /* Block is not shared if an entry's hash value == 0 */ 3212 hash = 0; 3213 break; 3214 } 3215 hash = (hash << BLOCK_HASH_SHIFT) ^ 3216 (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^ 3217 le32_to_cpu(here->e_hash); 3218 here = EXT4_XATTR_NEXT(here); 3219 } 3220 header->h_hash = cpu_to_le32(hash); 3221 } 3222 3223 #undef BLOCK_HASH_SHIFT 3224 3225 #define HASH_BUCKET_BITS 10 3226 3227 struct mb_cache * 3228 ext4_xattr_create_cache(void) 3229 { 3230 return mb_cache_create(HASH_BUCKET_BITS); 3231 } 3232 3233 void ext4_xattr_destroy_cache(struct mb_cache *cache) 3234 { 3235 if (cache) 3236 mb_cache_destroy(cache); 3237 } 3238 3239