1 #include <linux/ceph/ceph_debug.h> 2 #include <linux/ceph/pagelist.h> 3 4 #include "super.h" 5 #include "mds_client.h" 6 7 #include <linux/ceph/decode.h> 8 9 #include <linux/xattr.h> 10 #include <linux/posix_acl_xattr.h> 11 #include <linux/slab.h> 12 13 #define XATTR_CEPH_PREFIX "ceph." 14 #define XATTR_CEPH_PREFIX_LEN (sizeof (XATTR_CEPH_PREFIX) - 1) 15 16 static int __remove_xattr(struct ceph_inode_info *ci, 17 struct ceph_inode_xattr *xattr); 18 19 /* 20 * List of handlers for synthetic system.* attributes. Other 21 * attributes are handled directly. 22 */ 23 const struct xattr_handler *ceph_xattr_handlers[] = { 24 #ifdef CONFIG_CEPH_FS_POSIX_ACL 25 &posix_acl_access_xattr_handler, 26 &posix_acl_default_xattr_handler, 27 #endif 28 NULL, 29 }; 30 31 static bool ceph_is_valid_xattr(const char *name) 32 { 33 return !strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN) || 34 !strncmp(name, XATTR_SECURITY_PREFIX, 35 XATTR_SECURITY_PREFIX_LEN) || 36 !strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) || 37 !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) || 38 !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN); 39 } 40 41 /* 42 * These define virtual xattrs exposing the recursive directory 43 * statistics and layout metadata. 44 */ 45 struct ceph_vxattr { 46 char *name; 47 size_t name_size; /* strlen(name) + 1 (for '\0') */ 48 size_t (*getxattr_cb)(struct ceph_inode_info *ci, char *val, 49 size_t size); 50 bool readonly, hidden; 51 bool (*exists_cb)(struct ceph_inode_info *ci); 52 }; 53 54 /* layouts */ 55 56 static bool ceph_vxattrcb_layout_exists(struct ceph_inode_info *ci) 57 { 58 size_t s; 59 char *p = (char *)&ci->i_layout; 60 61 for (s = 0; s < sizeof(ci->i_layout); s++, p++) 62 if (*p) 63 return true; 64 return false; 65 } 66 67 static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val, 68 size_t size) 69 { 70 int ret; 71 struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb); 72 struct ceph_osd_client *osdc = &fsc->client->osdc; 73 s64 pool = ceph_file_layout_pg_pool(ci->i_layout); 74 const char *pool_name; 75 char buf[128]; 76 77 dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode); 78 down_read(&osdc->map_sem); 79 pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool); 80 if (pool_name) { 81 size_t len = strlen(pool_name); 82 ret = snprintf(buf, sizeof(buf), 83 "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=", 84 (unsigned long long)ceph_file_layout_su(ci->i_layout), 85 (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout), 86 (unsigned long long)ceph_file_layout_object_size(ci->i_layout)); 87 if (!size) { 88 ret += len; 89 } else if (ret + len > size) { 90 ret = -ERANGE; 91 } else { 92 memcpy(val, buf, ret); 93 memcpy(val + ret, pool_name, len); 94 ret += len; 95 } 96 } else { 97 ret = snprintf(buf, sizeof(buf), 98 "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%lld", 99 (unsigned long long)ceph_file_layout_su(ci->i_layout), 100 (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout), 101 (unsigned long long)ceph_file_layout_object_size(ci->i_layout), 102 (unsigned long long)pool); 103 if (size) { 104 if (ret <= size) 105 memcpy(val, buf, ret); 106 else 107 ret = -ERANGE; 108 } 109 } 110 up_read(&osdc->map_sem); 111 return ret; 112 } 113 114 static size_t ceph_vxattrcb_layout_stripe_unit(struct ceph_inode_info *ci, 115 char *val, size_t size) 116 { 117 return snprintf(val, size, "%lld", 118 (unsigned long long)ceph_file_layout_su(ci->i_layout)); 119 } 120 121 static size_t ceph_vxattrcb_layout_stripe_count(struct ceph_inode_info *ci, 122 char *val, size_t size) 123 { 124 return snprintf(val, size, "%lld", 125 (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout)); 126 } 127 128 static size_t ceph_vxattrcb_layout_object_size(struct ceph_inode_info *ci, 129 char *val, size_t size) 130 { 131 return snprintf(val, size, "%lld", 132 (unsigned long long)ceph_file_layout_object_size(ci->i_layout)); 133 } 134 135 static size_t ceph_vxattrcb_layout_pool(struct ceph_inode_info *ci, 136 char *val, size_t size) 137 { 138 int ret; 139 struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb); 140 struct ceph_osd_client *osdc = &fsc->client->osdc; 141 s64 pool = ceph_file_layout_pg_pool(ci->i_layout); 142 const char *pool_name; 143 144 down_read(&osdc->map_sem); 145 pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool); 146 if (pool_name) 147 ret = snprintf(val, size, "%s", pool_name); 148 else 149 ret = snprintf(val, size, "%lld", (unsigned long long)pool); 150 up_read(&osdc->map_sem); 151 return ret; 152 } 153 154 /* directories */ 155 156 static size_t ceph_vxattrcb_dir_entries(struct ceph_inode_info *ci, char *val, 157 size_t size) 158 { 159 return snprintf(val, size, "%lld", ci->i_files + ci->i_subdirs); 160 } 161 162 static size_t ceph_vxattrcb_dir_files(struct ceph_inode_info *ci, char *val, 163 size_t size) 164 { 165 return snprintf(val, size, "%lld", ci->i_files); 166 } 167 168 static size_t ceph_vxattrcb_dir_subdirs(struct ceph_inode_info *ci, char *val, 169 size_t size) 170 { 171 return snprintf(val, size, "%lld", ci->i_subdirs); 172 } 173 174 static size_t ceph_vxattrcb_dir_rentries(struct ceph_inode_info *ci, char *val, 175 size_t size) 176 { 177 return snprintf(val, size, "%lld", ci->i_rfiles + ci->i_rsubdirs); 178 } 179 180 static size_t ceph_vxattrcb_dir_rfiles(struct ceph_inode_info *ci, char *val, 181 size_t size) 182 { 183 return snprintf(val, size, "%lld", ci->i_rfiles); 184 } 185 186 static size_t ceph_vxattrcb_dir_rsubdirs(struct ceph_inode_info *ci, char *val, 187 size_t size) 188 { 189 return snprintf(val, size, "%lld", ci->i_rsubdirs); 190 } 191 192 static size_t ceph_vxattrcb_dir_rbytes(struct ceph_inode_info *ci, char *val, 193 size_t size) 194 { 195 return snprintf(val, size, "%lld", ci->i_rbytes); 196 } 197 198 static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val, 199 size_t size) 200 { 201 return snprintf(val, size, "%ld.09%ld", (long)ci->i_rctime.tv_sec, 202 (long)ci->i_rctime.tv_nsec); 203 } 204 205 206 #define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name 207 #define CEPH_XATTR_NAME2(_type, _name, _name2) \ 208 XATTR_CEPH_PREFIX #_type "." #_name "." #_name2 209 210 #define XATTR_NAME_CEPH(_type, _name) \ 211 { \ 212 .name = CEPH_XATTR_NAME(_type, _name), \ 213 .name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \ 214 .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \ 215 .readonly = true, \ 216 .hidden = false, \ 217 .exists_cb = NULL, \ 218 } 219 #define XATTR_LAYOUT_FIELD(_type, _name, _field) \ 220 { \ 221 .name = CEPH_XATTR_NAME2(_type, _name, _field), \ 222 .name_size = sizeof (CEPH_XATTR_NAME2(_type, _name, _field)), \ 223 .getxattr_cb = ceph_vxattrcb_ ## _name ## _ ## _field, \ 224 .readonly = false, \ 225 .hidden = true, \ 226 .exists_cb = ceph_vxattrcb_layout_exists, \ 227 } 228 229 static struct ceph_vxattr ceph_dir_vxattrs[] = { 230 { 231 .name = "ceph.dir.layout", 232 .name_size = sizeof("ceph.dir.layout"), 233 .getxattr_cb = ceph_vxattrcb_layout, 234 .readonly = false, 235 .hidden = true, 236 .exists_cb = ceph_vxattrcb_layout_exists, 237 }, 238 XATTR_LAYOUT_FIELD(dir, layout, stripe_unit), 239 XATTR_LAYOUT_FIELD(dir, layout, stripe_count), 240 XATTR_LAYOUT_FIELD(dir, layout, object_size), 241 XATTR_LAYOUT_FIELD(dir, layout, pool), 242 XATTR_NAME_CEPH(dir, entries), 243 XATTR_NAME_CEPH(dir, files), 244 XATTR_NAME_CEPH(dir, subdirs), 245 XATTR_NAME_CEPH(dir, rentries), 246 XATTR_NAME_CEPH(dir, rfiles), 247 XATTR_NAME_CEPH(dir, rsubdirs), 248 XATTR_NAME_CEPH(dir, rbytes), 249 XATTR_NAME_CEPH(dir, rctime), 250 { .name = NULL, 0 } /* Required table terminator */ 251 }; 252 static size_t ceph_dir_vxattrs_name_size; /* total size of all names */ 253 254 /* files */ 255 256 static struct ceph_vxattr ceph_file_vxattrs[] = { 257 { 258 .name = "ceph.file.layout", 259 .name_size = sizeof("ceph.file.layout"), 260 .getxattr_cb = ceph_vxattrcb_layout, 261 .readonly = false, 262 .hidden = true, 263 .exists_cb = ceph_vxattrcb_layout_exists, 264 }, 265 XATTR_LAYOUT_FIELD(file, layout, stripe_unit), 266 XATTR_LAYOUT_FIELD(file, layout, stripe_count), 267 XATTR_LAYOUT_FIELD(file, layout, object_size), 268 XATTR_LAYOUT_FIELD(file, layout, pool), 269 { .name = NULL, 0 } /* Required table terminator */ 270 }; 271 static size_t ceph_file_vxattrs_name_size; /* total size of all names */ 272 273 static struct ceph_vxattr *ceph_inode_vxattrs(struct inode *inode) 274 { 275 if (S_ISDIR(inode->i_mode)) 276 return ceph_dir_vxattrs; 277 else if (S_ISREG(inode->i_mode)) 278 return ceph_file_vxattrs; 279 return NULL; 280 } 281 282 static size_t ceph_vxattrs_name_size(struct ceph_vxattr *vxattrs) 283 { 284 if (vxattrs == ceph_dir_vxattrs) 285 return ceph_dir_vxattrs_name_size; 286 if (vxattrs == ceph_file_vxattrs) 287 return ceph_file_vxattrs_name_size; 288 BUG_ON(vxattrs); 289 return 0; 290 } 291 292 /* 293 * Compute the aggregate size (including terminating '\0') of all 294 * virtual extended attribute names in the given vxattr table. 295 */ 296 static size_t __init vxattrs_name_size(struct ceph_vxattr *vxattrs) 297 { 298 struct ceph_vxattr *vxattr; 299 size_t size = 0; 300 301 for (vxattr = vxattrs; vxattr->name; vxattr++) 302 if (!vxattr->hidden) 303 size += vxattr->name_size; 304 305 return size; 306 } 307 308 /* Routines called at initialization and exit time */ 309 310 void __init ceph_xattr_init(void) 311 { 312 ceph_dir_vxattrs_name_size = vxattrs_name_size(ceph_dir_vxattrs); 313 ceph_file_vxattrs_name_size = vxattrs_name_size(ceph_file_vxattrs); 314 } 315 316 void ceph_xattr_exit(void) 317 { 318 ceph_dir_vxattrs_name_size = 0; 319 ceph_file_vxattrs_name_size = 0; 320 } 321 322 static struct ceph_vxattr *ceph_match_vxattr(struct inode *inode, 323 const char *name) 324 { 325 struct ceph_vxattr *vxattr = ceph_inode_vxattrs(inode); 326 327 if (vxattr) { 328 while (vxattr->name) { 329 if (!strcmp(vxattr->name, name)) 330 return vxattr; 331 vxattr++; 332 } 333 } 334 335 return NULL; 336 } 337 338 static int __set_xattr(struct ceph_inode_info *ci, 339 const char *name, int name_len, 340 const char *val, int val_len, 341 int flags, int update_xattr, 342 struct ceph_inode_xattr **newxattr) 343 { 344 struct rb_node **p; 345 struct rb_node *parent = NULL; 346 struct ceph_inode_xattr *xattr = NULL; 347 int c; 348 int new = 0; 349 350 p = &ci->i_xattrs.index.rb_node; 351 while (*p) { 352 parent = *p; 353 xattr = rb_entry(parent, struct ceph_inode_xattr, node); 354 c = strncmp(name, xattr->name, min(name_len, xattr->name_len)); 355 if (c < 0) 356 p = &(*p)->rb_left; 357 else if (c > 0) 358 p = &(*p)->rb_right; 359 else { 360 if (name_len == xattr->name_len) 361 break; 362 else if (name_len < xattr->name_len) 363 p = &(*p)->rb_left; 364 else 365 p = &(*p)->rb_right; 366 } 367 xattr = NULL; 368 } 369 370 if (update_xattr) { 371 int err = 0; 372 if (xattr && (flags & XATTR_CREATE)) 373 err = -EEXIST; 374 else if (!xattr && (flags & XATTR_REPLACE)) 375 err = -ENODATA; 376 if (err) { 377 kfree(name); 378 kfree(val); 379 return err; 380 } 381 if (update_xattr < 0) { 382 if (xattr) 383 __remove_xattr(ci, xattr); 384 kfree(name); 385 return 0; 386 } 387 } 388 389 if (!xattr) { 390 new = 1; 391 xattr = *newxattr; 392 xattr->name = name; 393 xattr->name_len = name_len; 394 xattr->should_free_name = update_xattr; 395 396 ci->i_xattrs.count++; 397 dout("__set_xattr count=%d\n", ci->i_xattrs.count); 398 } else { 399 kfree(*newxattr); 400 *newxattr = NULL; 401 if (xattr->should_free_val) 402 kfree((void *)xattr->val); 403 404 if (update_xattr) { 405 kfree((void *)name); 406 name = xattr->name; 407 } 408 ci->i_xattrs.names_size -= xattr->name_len; 409 ci->i_xattrs.vals_size -= xattr->val_len; 410 } 411 ci->i_xattrs.names_size += name_len; 412 ci->i_xattrs.vals_size += val_len; 413 if (val) 414 xattr->val = val; 415 else 416 xattr->val = ""; 417 418 xattr->val_len = val_len; 419 xattr->dirty = update_xattr; 420 xattr->should_free_val = (val && update_xattr); 421 422 if (new) { 423 rb_link_node(&xattr->node, parent, p); 424 rb_insert_color(&xattr->node, &ci->i_xattrs.index); 425 dout("__set_xattr_val p=%p\n", p); 426 } 427 428 dout("__set_xattr_val added %llx.%llx xattr %p %s=%.*s\n", 429 ceph_vinop(&ci->vfs_inode), xattr, name, val_len, val); 430 431 return 0; 432 } 433 434 static struct ceph_inode_xattr *__get_xattr(struct ceph_inode_info *ci, 435 const char *name) 436 { 437 struct rb_node **p; 438 struct rb_node *parent = NULL; 439 struct ceph_inode_xattr *xattr = NULL; 440 int name_len = strlen(name); 441 int c; 442 443 p = &ci->i_xattrs.index.rb_node; 444 while (*p) { 445 parent = *p; 446 xattr = rb_entry(parent, struct ceph_inode_xattr, node); 447 c = strncmp(name, xattr->name, xattr->name_len); 448 if (c == 0 && name_len > xattr->name_len) 449 c = 1; 450 if (c < 0) 451 p = &(*p)->rb_left; 452 else if (c > 0) 453 p = &(*p)->rb_right; 454 else { 455 dout("__get_xattr %s: found %.*s\n", name, 456 xattr->val_len, xattr->val); 457 return xattr; 458 } 459 } 460 461 dout("__get_xattr %s: not found\n", name); 462 463 return NULL; 464 } 465 466 static void __free_xattr(struct ceph_inode_xattr *xattr) 467 { 468 BUG_ON(!xattr); 469 470 if (xattr->should_free_name) 471 kfree((void *)xattr->name); 472 if (xattr->should_free_val) 473 kfree((void *)xattr->val); 474 475 kfree(xattr); 476 } 477 478 static int __remove_xattr(struct ceph_inode_info *ci, 479 struct ceph_inode_xattr *xattr) 480 { 481 if (!xattr) 482 return -ENODATA; 483 484 rb_erase(&xattr->node, &ci->i_xattrs.index); 485 486 if (xattr->should_free_name) 487 kfree((void *)xattr->name); 488 if (xattr->should_free_val) 489 kfree((void *)xattr->val); 490 491 ci->i_xattrs.names_size -= xattr->name_len; 492 ci->i_xattrs.vals_size -= xattr->val_len; 493 ci->i_xattrs.count--; 494 kfree(xattr); 495 496 return 0; 497 } 498 499 static int __remove_xattr_by_name(struct ceph_inode_info *ci, 500 const char *name) 501 { 502 struct rb_node **p; 503 struct ceph_inode_xattr *xattr; 504 int err; 505 506 p = &ci->i_xattrs.index.rb_node; 507 xattr = __get_xattr(ci, name); 508 err = __remove_xattr(ci, xattr); 509 return err; 510 } 511 512 static char *__copy_xattr_names(struct ceph_inode_info *ci, 513 char *dest) 514 { 515 struct rb_node *p; 516 struct ceph_inode_xattr *xattr = NULL; 517 518 p = rb_first(&ci->i_xattrs.index); 519 dout("__copy_xattr_names count=%d\n", ci->i_xattrs.count); 520 521 while (p) { 522 xattr = rb_entry(p, struct ceph_inode_xattr, node); 523 memcpy(dest, xattr->name, xattr->name_len); 524 dest[xattr->name_len] = '\0'; 525 526 dout("dest=%s %p (%s) (%d/%d)\n", dest, xattr, xattr->name, 527 xattr->name_len, ci->i_xattrs.names_size); 528 529 dest += xattr->name_len + 1; 530 p = rb_next(p); 531 } 532 533 return dest; 534 } 535 536 void __ceph_destroy_xattrs(struct ceph_inode_info *ci) 537 { 538 struct rb_node *p, *tmp; 539 struct ceph_inode_xattr *xattr = NULL; 540 541 p = rb_first(&ci->i_xattrs.index); 542 543 dout("__ceph_destroy_xattrs p=%p\n", p); 544 545 while (p) { 546 xattr = rb_entry(p, struct ceph_inode_xattr, node); 547 tmp = p; 548 p = rb_next(tmp); 549 dout("__ceph_destroy_xattrs next p=%p (%.*s)\n", p, 550 xattr->name_len, xattr->name); 551 rb_erase(tmp, &ci->i_xattrs.index); 552 553 __free_xattr(xattr); 554 } 555 556 ci->i_xattrs.names_size = 0; 557 ci->i_xattrs.vals_size = 0; 558 ci->i_xattrs.index_version = 0; 559 ci->i_xattrs.count = 0; 560 ci->i_xattrs.index = RB_ROOT; 561 } 562 563 static int __build_xattrs(struct inode *inode) 564 __releases(ci->i_ceph_lock) 565 __acquires(ci->i_ceph_lock) 566 { 567 u32 namelen; 568 u32 numattr = 0; 569 void *p, *end; 570 u32 len; 571 const char *name, *val; 572 struct ceph_inode_info *ci = ceph_inode(inode); 573 int xattr_version; 574 struct ceph_inode_xattr **xattrs = NULL; 575 int err = 0; 576 int i; 577 578 dout("__build_xattrs() len=%d\n", 579 ci->i_xattrs.blob ? (int)ci->i_xattrs.blob->vec.iov_len : 0); 580 581 if (ci->i_xattrs.index_version >= ci->i_xattrs.version) 582 return 0; /* already built */ 583 584 __ceph_destroy_xattrs(ci); 585 586 start: 587 /* updated internal xattr rb tree */ 588 if (ci->i_xattrs.blob && ci->i_xattrs.blob->vec.iov_len > 4) { 589 p = ci->i_xattrs.blob->vec.iov_base; 590 end = p + ci->i_xattrs.blob->vec.iov_len; 591 ceph_decode_32_safe(&p, end, numattr, bad); 592 xattr_version = ci->i_xattrs.version; 593 spin_unlock(&ci->i_ceph_lock); 594 595 xattrs = kcalloc(numattr, sizeof(struct ceph_inode_xattr *), 596 GFP_NOFS); 597 err = -ENOMEM; 598 if (!xattrs) 599 goto bad_lock; 600 601 for (i = 0; i < numattr; i++) { 602 xattrs[i] = kmalloc(sizeof(struct ceph_inode_xattr), 603 GFP_NOFS); 604 if (!xattrs[i]) 605 goto bad_lock; 606 } 607 608 spin_lock(&ci->i_ceph_lock); 609 if (ci->i_xattrs.version != xattr_version) { 610 /* lost a race, retry */ 611 for (i = 0; i < numattr; i++) 612 kfree(xattrs[i]); 613 kfree(xattrs); 614 xattrs = NULL; 615 goto start; 616 } 617 err = -EIO; 618 while (numattr--) { 619 ceph_decode_32_safe(&p, end, len, bad); 620 namelen = len; 621 name = p; 622 p += len; 623 ceph_decode_32_safe(&p, end, len, bad); 624 val = p; 625 p += len; 626 627 err = __set_xattr(ci, name, namelen, val, len, 628 0, 0, &xattrs[numattr]); 629 630 if (err < 0) 631 goto bad; 632 } 633 kfree(xattrs); 634 } 635 ci->i_xattrs.index_version = ci->i_xattrs.version; 636 ci->i_xattrs.dirty = false; 637 638 return err; 639 bad_lock: 640 spin_lock(&ci->i_ceph_lock); 641 bad: 642 if (xattrs) { 643 for (i = 0; i < numattr; i++) 644 kfree(xattrs[i]); 645 kfree(xattrs); 646 } 647 ci->i_xattrs.names_size = 0; 648 return err; 649 } 650 651 static int __get_required_blob_size(struct ceph_inode_info *ci, int name_size, 652 int val_size) 653 { 654 /* 655 * 4 bytes for the length, and additional 4 bytes per each xattr name, 656 * 4 bytes per each value 657 */ 658 int size = 4 + ci->i_xattrs.count*(4 + 4) + 659 ci->i_xattrs.names_size + 660 ci->i_xattrs.vals_size; 661 dout("__get_required_blob_size c=%d names.size=%d vals.size=%d\n", 662 ci->i_xattrs.count, ci->i_xattrs.names_size, 663 ci->i_xattrs.vals_size); 664 665 if (name_size) 666 size += 4 + 4 + name_size + val_size; 667 668 return size; 669 } 670 671 /* 672 * If there are dirty xattrs, reencode xattrs into the prealloc_blob 673 * and swap into place. 674 */ 675 void __ceph_build_xattrs_blob(struct ceph_inode_info *ci) 676 { 677 struct rb_node *p; 678 struct ceph_inode_xattr *xattr = NULL; 679 void *dest; 680 681 dout("__build_xattrs_blob %p\n", &ci->vfs_inode); 682 if (ci->i_xattrs.dirty) { 683 int need = __get_required_blob_size(ci, 0, 0); 684 685 BUG_ON(need > ci->i_xattrs.prealloc_blob->alloc_len); 686 687 p = rb_first(&ci->i_xattrs.index); 688 dest = ci->i_xattrs.prealloc_blob->vec.iov_base; 689 690 ceph_encode_32(&dest, ci->i_xattrs.count); 691 while (p) { 692 xattr = rb_entry(p, struct ceph_inode_xattr, node); 693 694 ceph_encode_32(&dest, xattr->name_len); 695 memcpy(dest, xattr->name, xattr->name_len); 696 dest += xattr->name_len; 697 ceph_encode_32(&dest, xattr->val_len); 698 memcpy(dest, xattr->val, xattr->val_len); 699 dest += xattr->val_len; 700 701 p = rb_next(p); 702 } 703 704 /* adjust buffer len; it may be larger than we need */ 705 ci->i_xattrs.prealloc_blob->vec.iov_len = 706 dest - ci->i_xattrs.prealloc_blob->vec.iov_base; 707 708 if (ci->i_xattrs.blob) 709 ceph_buffer_put(ci->i_xattrs.blob); 710 ci->i_xattrs.blob = ci->i_xattrs.prealloc_blob; 711 ci->i_xattrs.prealloc_blob = NULL; 712 ci->i_xattrs.dirty = false; 713 ci->i_xattrs.version++; 714 } 715 } 716 717 ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value, 718 size_t size) 719 { 720 struct ceph_inode_info *ci = ceph_inode(inode); 721 int err; 722 struct ceph_inode_xattr *xattr; 723 struct ceph_vxattr *vxattr = NULL; 724 725 if (!ceph_is_valid_xattr(name)) 726 return -ENODATA; 727 728 /* let's see if a virtual xattr was requested */ 729 vxattr = ceph_match_vxattr(inode, name); 730 if (vxattr && !(vxattr->exists_cb && !vxattr->exists_cb(ci))) { 731 err = vxattr->getxattr_cb(ci, value, size); 732 return err; 733 } 734 735 spin_lock(&ci->i_ceph_lock); 736 dout("getxattr %p ver=%lld index_ver=%lld\n", inode, 737 ci->i_xattrs.version, ci->i_xattrs.index_version); 738 739 if (ci->i_xattrs.version == 0 || 740 !__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1)) { 741 spin_unlock(&ci->i_ceph_lock); 742 /* get xattrs from mds (if we don't already have them) */ 743 err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR, true); 744 if (err) 745 return err; 746 spin_lock(&ci->i_ceph_lock); 747 } 748 749 err = __build_xattrs(inode); 750 if (err < 0) 751 goto out; 752 753 err = -ENODATA; /* == ENOATTR */ 754 xattr = __get_xattr(ci, name); 755 if (!xattr) 756 goto out; 757 758 err = -ERANGE; 759 if (size && size < xattr->val_len) 760 goto out; 761 762 err = xattr->val_len; 763 if (size == 0) 764 goto out; 765 766 memcpy(value, xattr->val, xattr->val_len); 767 768 out: 769 spin_unlock(&ci->i_ceph_lock); 770 return err; 771 } 772 773 ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value, 774 size_t size) 775 { 776 if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) 777 return generic_getxattr(dentry, name, value, size); 778 779 return __ceph_getxattr(d_inode(dentry), name, value, size); 780 } 781 782 ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size) 783 { 784 struct inode *inode = d_inode(dentry); 785 struct ceph_inode_info *ci = ceph_inode(inode); 786 struct ceph_vxattr *vxattrs = ceph_inode_vxattrs(inode); 787 u32 vir_namelen = 0; 788 u32 namelen; 789 int err; 790 u32 len; 791 int i; 792 793 spin_lock(&ci->i_ceph_lock); 794 dout("listxattr %p ver=%lld index_ver=%lld\n", inode, 795 ci->i_xattrs.version, ci->i_xattrs.index_version); 796 797 if (ci->i_xattrs.version == 0 || 798 !__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1)) { 799 spin_unlock(&ci->i_ceph_lock); 800 err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR, true); 801 if (err) 802 return err; 803 spin_lock(&ci->i_ceph_lock); 804 } 805 806 err = __build_xattrs(inode); 807 if (err < 0) 808 goto out; 809 /* 810 * Start with virtual dir xattr names (if any) (including 811 * terminating '\0' characters for each). 812 */ 813 vir_namelen = ceph_vxattrs_name_size(vxattrs); 814 815 /* adding 1 byte per each variable due to the null termination */ 816 namelen = ci->i_xattrs.names_size + ci->i_xattrs.count; 817 err = -ERANGE; 818 if (size && vir_namelen + namelen > size) 819 goto out; 820 821 err = namelen + vir_namelen; 822 if (size == 0) 823 goto out; 824 825 names = __copy_xattr_names(ci, names); 826 827 /* virtual xattr names, too */ 828 err = namelen; 829 if (vxattrs) { 830 for (i = 0; vxattrs[i].name; i++) { 831 if (!vxattrs[i].hidden && 832 !(vxattrs[i].exists_cb && 833 !vxattrs[i].exists_cb(ci))) { 834 len = sprintf(names, "%s", vxattrs[i].name); 835 names += len + 1; 836 err += len + 1; 837 } 838 } 839 } 840 841 out: 842 spin_unlock(&ci->i_ceph_lock); 843 return err; 844 } 845 846 static int ceph_sync_setxattr(struct dentry *dentry, const char *name, 847 const char *value, size_t size, int flags) 848 { 849 struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); 850 struct inode *inode = d_inode(dentry); 851 struct ceph_inode_info *ci = ceph_inode(inode); 852 struct ceph_mds_request *req; 853 struct ceph_mds_client *mdsc = fsc->mdsc; 854 struct ceph_pagelist *pagelist = NULL; 855 int err; 856 857 if (size > 0) { 858 /* copy value into pagelist */ 859 pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS); 860 if (!pagelist) 861 return -ENOMEM; 862 863 ceph_pagelist_init(pagelist); 864 err = ceph_pagelist_append(pagelist, value, size); 865 if (err) 866 goto out; 867 } else if (!value) { 868 flags |= CEPH_XATTR_REMOVE; 869 } 870 871 dout("setxattr value=%.*s\n", (int)size, value); 872 873 /* do request */ 874 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETXATTR, 875 USE_AUTH_MDS); 876 if (IS_ERR(req)) { 877 err = PTR_ERR(req); 878 goto out; 879 } 880 881 req->r_args.setxattr.flags = cpu_to_le32(flags); 882 req->r_path2 = kstrdup(name, GFP_NOFS); 883 if (!req->r_path2) { 884 ceph_mdsc_put_request(req); 885 err = -ENOMEM; 886 goto out; 887 } 888 889 req->r_pagelist = pagelist; 890 pagelist = NULL; 891 892 req->r_inode = inode; 893 ihold(inode); 894 req->r_num_caps = 1; 895 req->r_inode_drop = CEPH_CAP_XATTR_SHARED; 896 897 dout("xattr.ver (before): %lld\n", ci->i_xattrs.version); 898 err = ceph_mdsc_do_request(mdsc, NULL, req); 899 ceph_mdsc_put_request(req); 900 dout("xattr.ver (after): %lld\n", ci->i_xattrs.version); 901 902 out: 903 if (pagelist) 904 ceph_pagelist_release(pagelist); 905 return err; 906 } 907 908 int __ceph_setxattr(struct dentry *dentry, const char *name, 909 const void *value, size_t size, int flags) 910 { 911 struct inode *inode = d_inode(dentry); 912 struct ceph_vxattr *vxattr; 913 struct ceph_inode_info *ci = ceph_inode(inode); 914 struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc; 915 struct ceph_cap_flush *prealloc_cf = NULL; 916 int issued; 917 int err; 918 int dirty = 0; 919 int name_len = strlen(name); 920 int val_len = size; 921 char *newname = NULL; 922 char *newval = NULL; 923 struct ceph_inode_xattr *xattr = NULL; 924 int required_blob_size; 925 bool lock_snap_rwsem = false; 926 927 if (!ceph_is_valid_xattr(name)) 928 return -EOPNOTSUPP; 929 930 vxattr = ceph_match_vxattr(inode, name); 931 if (vxattr && vxattr->readonly) 932 return -EOPNOTSUPP; 933 934 /* pass any unhandled ceph.* xattrs through to the MDS */ 935 if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN)) 936 goto do_sync_unlocked; 937 938 /* preallocate memory for xattr name, value, index node */ 939 err = -ENOMEM; 940 newname = kmemdup(name, name_len + 1, GFP_NOFS); 941 if (!newname) 942 goto out; 943 944 if (val_len) { 945 newval = kmemdup(value, val_len, GFP_NOFS); 946 if (!newval) 947 goto out; 948 } 949 950 xattr = kmalloc(sizeof(struct ceph_inode_xattr), GFP_NOFS); 951 if (!xattr) 952 goto out; 953 954 prealloc_cf = ceph_alloc_cap_flush(); 955 if (!prealloc_cf) 956 goto out; 957 958 spin_lock(&ci->i_ceph_lock); 959 retry: 960 issued = __ceph_caps_issued(ci, NULL); 961 if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL)) 962 goto do_sync; 963 964 if (!lock_snap_rwsem && !ci->i_head_snapc) { 965 lock_snap_rwsem = true; 966 if (!down_read_trylock(&mdsc->snap_rwsem)) { 967 spin_unlock(&ci->i_ceph_lock); 968 down_read(&mdsc->snap_rwsem); 969 spin_lock(&ci->i_ceph_lock); 970 goto retry; 971 } 972 } 973 974 dout("setxattr %p issued %s\n", inode, ceph_cap_string(issued)); 975 __build_xattrs(inode); 976 977 required_blob_size = __get_required_blob_size(ci, name_len, val_len); 978 979 if (!ci->i_xattrs.prealloc_blob || 980 required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) { 981 struct ceph_buffer *blob; 982 983 spin_unlock(&ci->i_ceph_lock); 984 dout(" preaallocating new blob size=%d\n", required_blob_size); 985 blob = ceph_buffer_new(required_blob_size, GFP_NOFS); 986 if (!blob) 987 goto do_sync_unlocked; 988 spin_lock(&ci->i_ceph_lock); 989 if (ci->i_xattrs.prealloc_blob) 990 ceph_buffer_put(ci->i_xattrs.prealloc_blob); 991 ci->i_xattrs.prealloc_blob = blob; 992 goto retry; 993 } 994 995 err = __set_xattr(ci, newname, name_len, newval, val_len, 996 flags, value ? 1 : -1, &xattr); 997 998 if (!err) { 999 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL, 1000 &prealloc_cf); 1001 ci->i_xattrs.dirty = true; 1002 inode->i_ctime = CURRENT_TIME; 1003 } 1004 1005 spin_unlock(&ci->i_ceph_lock); 1006 if (lock_snap_rwsem) 1007 up_read(&mdsc->snap_rwsem); 1008 if (dirty) 1009 __mark_inode_dirty(inode, dirty); 1010 ceph_free_cap_flush(prealloc_cf); 1011 return err; 1012 1013 do_sync: 1014 spin_unlock(&ci->i_ceph_lock); 1015 do_sync_unlocked: 1016 if (lock_snap_rwsem) 1017 up_read(&mdsc->snap_rwsem); 1018 err = ceph_sync_setxattr(dentry, name, value, size, flags); 1019 out: 1020 ceph_free_cap_flush(prealloc_cf); 1021 kfree(newname); 1022 kfree(newval); 1023 kfree(xattr); 1024 return err; 1025 } 1026 1027 int ceph_setxattr(struct dentry *dentry, const char *name, 1028 const void *value, size_t size, int flags) 1029 { 1030 if (ceph_snap(d_inode(dentry)) != CEPH_NOSNAP) 1031 return -EROFS; 1032 1033 if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) 1034 return generic_setxattr(dentry, name, value, size, flags); 1035 1036 if (size == 0) 1037 value = ""; /* empty EA, do not remove */ 1038 1039 return __ceph_setxattr(dentry, name, value, size, flags); 1040 } 1041 1042 static int ceph_send_removexattr(struct dentry *dentry, const char *name) 1043 { 1044 struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); 1045 struct ceph_mds_client *mdsc = fsc->mdsc; 1046 struct inode *inode = d_inode(dentry); 1047 struct ceph_mds_request *req; 1048 int err; 1049 1050 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_RMXATTR, 1051 USE_AUTH_MDS); 1052 if (IS_ERR(req)) 1053 return PTR_ERR(req); 1054 req->r_path2 = kstrdup(name, GFP_NOFS); 1055 if (!req->r_path2) 1056 return -ENOMEM; 1057 1058 req->r_inode = inode; 1059 ihold(inode); 1060 req->r_num_caps = 1; 1061 req->r_inode_drop = CEPH_CAP_XATTR_SHARED; 1062 err = ceph_mdsc_do_request(mdsc, NULL, req); 1063 ceph_mdsc_put_request(req); 1064 return err; 1065 } 1066 1067 int __ceph_removexattr(struct dentry *dentry, const char *name) 1068 { 1069 struct inode *inode = d_inode(dentry); 1070 struct ceph_vxattr *vxattr; 1071 struct ceph_inode_info *ci = ceph_inode(inode); 1072 struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc; 1073 struct ceph_cap_flush *prealloc_cf = NULL; 1074 int issued; 1075 int err; 1076 int required_blob_size; 1077 int dirty; 1078 bool lock_snap_rwsem = false; 1079 1080 if (!ceph_is_valid_xattr(name)) 1081 return -EOPNOTSUPP; 1082 1083 vxattr = ceph_match_vxattr(inode, name); 1084 if (vxattr && vxattr->readonly) 1085 return -EOPNOTSUPP; 1086 1087 /* pass any unhandled ceph.* xattrs through to the MDS */ 1088 if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN)) 1089 goto do_sync_unlocked; 1090 1091 prealloc_cf = ceph_alloc_cap_flush(); 1092 if (!prealloc_cf) 1093 return -ENOMEM; 1094 1095 err = -ENOMEM; 1096 spin_lock(&ci->i_ceph_lock); 1097 retry: 1098 issued = __ceph_caps_issued(ci, NULL); 1099 if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL)) 1100 goto do_sync; 1101 1102 if (!lock_snap_rwsem && !ci->i_head_snapc) { 1103 lock_snap_rwsem = true; 1104 if (!down_read_trylock(&mdsc->snap_rwsem)) { 1105 spin_unlock(&ci->i_ceph_lock); 1106 down_read(&mdsc->snap_rwsem); 1107 spin_lock(&ci->i_ceph_lock); 1108 goto retry; 1109 } 1110 } 1111 1112 dout("removexattr %p issued %s\n", inode, ceph_cap_string(issued)); 1113 1114 __build_xattrs(inode); 1115 1116 required_blob_size = __get_required_blob_size(ci, 0, 0); 1117 1118 if (!ci->i_xattrs.prealloc_blob || 1119 required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) { 1120 struct ceph_buffer *blob; 1121 1122 spin_unlock(&ci->i_ceph_lock); 1123 dout(" preaallocating new blob size=%d\n", required_blob_size); 1124 blob = ceph_buffer_new(required_blob_size, GFP_NOFS); 1125 if (!blob) 1126 goto do_sync_unlocked; 1127 spin_lock(&ci->i_ceph_lock); 1128 if (ci->i_xattrs.prealloc_blob) 1129 ceph_buffer_put(ci->i_xattrs.prealloc_blob); 1130 ci->i_xattrs.prealloc_blob = blob; 1131 goto retry; 1132 } 1133 1134 err = __remove_xattr_by_name(ceph_inode(inode), name); 1135 1136 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL, 1137 &prealloc_cf); 1138 ci->i_xattrs.dirty = true; 1139 inode->i_ctime = CURRENT_TIME; 1140 spin_unlock(&ci->i_ceph_lock); 1141 if (lock_snap_rwsem) 1142 up_read(&mdsc->snap_rwsem); 1143 if (dirty) 1144 __mark_inode_dirty(inode, dirty); 1145 ceph_free_cap_flush(prealloc_cf); 1146 return err; 1147 do_sync: 1148 spin_unlock(&ci->i_ceph_lock); 1149 do_sync_unlocked: 1150 if (lock_snap_rwsem) 1151 up_read(&mdsc->snap_rwsem); 1152 ceph_free_cap_flush(prealloc_cf); 1153 err = ceph_send_removexattr(dentry, name); 1154 return err; 1155 } 1156 1157 int ceph_removexattr(struct dentry *dentry, const char *name) 1158 { 1159 if (ceph_snap(d_inode(dentry)) != CEPH_NOSNAP) 1160 return -EROFS; 1161 1162 if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) 1163 return generic_removexattr(dentry, name); 1164 1165 return __ceph_removexattr(dentry, name); 1166 } 1167