1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/ceph/ceph_debug.h> 3 #include <linux/ceph/pagelist.h> 4 5 #include "super.h" 6 #include "mds_client.h" 7 8 #include <linux/ceph/decode.h> 9 10 #include <linux/xattr.h> 11 #include <linux/posix_acl_xattr.h> 12 #include <linux/slab.h> 13 14 #define XATTR_CEPH_PREFIX "ceph." 15 #define XATTR_CEPH_PREFIX_LEN (sizeof (XATTR_CEPH_PREFIX) - 1) 16 17 static int __remove_xattr(struct ceph_inode_info *ci, 18 struct ceph_inode_xattr *xattr); 19 20 static const struct xattr_handler ceph_other_xattr_handler; 21 22 /* 23 * List of handlers for synthetic system.* attributes. Other 24 * attributes are handled directly. 25 */ 26 const struct xattr_handler *ceph_xattr_handlers[] = { 27 #ifdef CONFIG_CEPH_FS_POSIX_ACL 28 &posix_acl_access_xattr_handler, 29 &posix_acl_default_xattr_handler, 30 #endif 31 &ceph_other_xattr_handler, 32 NULL, 33 }; 34 35 static bool ceph_is_valid_xattr(const char *name) 36 { 37 return !strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN) || 38 !strncmp(name, XATTR_SECURITY_PREFIX, 39 XATTR_SECURITY_PREFIX_LEN) || 40 !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) || 41 !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN); 42 } 43 44 /* 45 * These define virtual xattrs exposing the recursive directory 46 * statistics and layout metadata. 47 */ 48 struct ceph_vxattr { 49 char *name; 50 size_t name_size; /* strlen(name) + 1 (for '\0') */ 51 size_t (*getxattr_cb)(struct ceph_inode_info *ci, char *val, 52 size_t size); 53 bool readonly, hidden; 54 bool (*exists_cb)(struct ceph_inode_info *ci); 55 }; 56 57 /* layouts */ 58 59 static bool ceph_vxattrcb_layout_exists(struct ceph_inode_info *ci) 60 { 61 struct ceph_file_layout *fl = &ci->i_layout; 62 return (fl->stripe_unit > 0 || fl->stripe_count > 0 || 63 fl->object_size > 0 || fl->pool_id >= 0 || 64 rcu_dereference_raw(fl->pool_ns) != NULL); 65 } 66 67 static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val, 68 size_t size) 69 { 70 struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb); 71 struct ceph_osd_client *osdc = &fsc->client->osdc; 72 struct ceph_string *pool_ns; 73 s64 pool = ci->i_layout.pool_id; 74 const char *pool_name; 75 const char *ns_field = " pool_namespace="; 76 char buf[128]; 77 size_t len, total_len = 0; 78 int ret; 79 80 pool_ns = ceph_try_get_string(ci->i_layout.pool_ns); 81 82 dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode); 83 down_read(&osdc->lock); 84 pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool); 85 if (pool_name) { 86 len = snprintf(buf, sizeof(buf), 87 "stripe_unit=%u stripe_count=%u object_size=%u pool=", 88 ci->i_layout.stripe_unit, ci->i_layout.stripe_count, 89 ci->i_layout.object_size); 90 total_len = len + strlen(pool_name); 91 } else { 92 len = snprintf(buf, sizeof(buf), 93 "stripe_unit=%u stripe_count=%u object_size=%u pool=%lld", 94 ci->i_layout.stripe_unit, ci->i_layout.stripe_count, 95 ci->i_layout.object_size, (unsigned long long)pool); 96 total_len = len; 97 } 98 99 if (pool_ns) 100 total_len += strlen(ns_field) + pool_ns->len; 101 102 if (!size) { 103 ret = total_len; 104 } else if (total_len > size) { 105 ret = -ERANGE; 106 } else { 107 memcpy(val, buf, len); 108 ret = len; 109 if (pool_name) { 110 len = strlen(pool_name); 111 memcpy(val + ret, pool_name, len); 112 ret += len; 113 } 114 if (pool_ns) { 115 len = strlen(ns_field); 116 memcpy(val + ret, ns_field, len); 117 ret += len; 118 memcpy(val + ret, pool_ns->str, pool_ns->len); 119 ret += pool_ns->len; 120 } 121 } 122 up_read(&osdc->lock); 123 ceph_put_string(pool_ns); 124 return ret; 125 } 126 127 static size_t ceph_vxattrcb_layout_stripe_unit(struct ceph_inode_info *ci, 128 char *val, size_t size) 129 { 130 return snprintf(val, size, "%u", ci->i_layout.stripe_unit); 131 } 132 133 static size_t ceph_vxattrcb_layout_stripe_count(struct ceph_inode_info *ci, 134 char *val, size_t size) 135 { 136 return snprintf(val, size, "%u", ci->i_layout.stripe_count); 137 } 138 139 static size_t ceph_vxattrcb_layout_object_size(struct ceph_inode_info *ci, 140 char *val, size_t size) 141 { 142 return snprintf(val, size, "%u", ci->i_layout.object_size); 143 } 144 145 static size_t ceph_vxattrcb_layout_pool(struct ceph_inode_info *ci, 146 char *val, size_t size) 147 { 148 int ret; 149 struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb); 150 struct ceph_osd_client *osdc = &fsc->client->osdc; 151 s64 pool = ci->i_layout.pool_id; 152 const char *pool_name; 153 154 down_read(&osdc->lock); 155 pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool); 156 if (pool_name) 157 ret = snprintf(val, size, "%s", pool_name); 158 else 159 ret = snprintf(val, size, "%lld", (unsigned long long)pool); 160 up_read(&osdc->lock); 161 return ret; 162 } 163 164 static size_t ceph_vxattrcb_layout_pool_namespace(struct ceph_inode_info *ci, 165 char *val, size_t size) 166 { 167 int ret = 0; 168 struct ceph_string *ns = ceph_try_get_string(ci->i_layout.pool_ns); 169 if (ns) { 170 ret = snprintf(val, size, "%.*s", (int)ns->len, ns->str); 171 ceph_put_string(ns); 172 } 173 return ret; 174 } 175 176 /* directories */ 177 178 static size_t ceph_vxattrcb_dir_entries(struct ceph_inode_info *ci, char *val, 179 size_t size) 180 { 181 return snprintf(val, size, "%lld", ci->i_files + ci->i_subdirs); 182 } 183 184 static size_t ceph_vxattrcb_dir_files(struct ceph_inode_info *ci, char *val, 185 size_t size) 186 { 187 return snprintf(val, size, "%lld", ci->i_files); 188 } 189 190 static size_t ceph_vxattrcb_dir_subdirs(struct ceph_inode_info *ci, char *val, 191 size_t size) 192 { 193 return snprintf(val, size, "%lld", ci->i_subdirs); 194 } 195 196 static size_t ceph_vxattrcb_dir_rentries(struct ceph_inode_info *ci, char *val, 197 size_t size) 198 { 199 return snprintf(val, size, "%lld", ci->i_rfiles + ci->i_rsubdirs); 200 } 201 202 static size_t ceph_vxattrcb_dir_rfiles(struct ceph_inode_info *ci, char *val, 203 size_t size) 204 { 205 return snprintf(val, size, "%lld", ci->i_rfiles); 206 } 207 208 static size_t ceph_vxattrcb_dir_rsubdirs(struct ceph_inode_info *ci, char *val, 209 size_t size) 210 { 211 return snprintf(val, size, "%lld", ci->i_rsubdirs); 212 } 213 214 static size_t ceph_vxattrcb_dir_rbytes(struct ceph_inode_info *ci, char *val, 215 size_t size) 216 { 217 return snprintf(val, size, "%lld", ci->i_rbytes); 218 } 219 220 static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val, 221 size_t size) 222 { 223 return snprintf(val, size, "%ld.09%ld", (long)ci->i_rctime.tv_sec, 224 (long)ci->i_rctime.tv_nsec); 225 } 226 227 228 #define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name 229 #define CEPH_XATTR_NAME2(_type, _name, _name2) \ 230 XATTR_CEPH_PREFIX #_type "." #_name "." #_name2 231 232 #define XATTR_NAME_CEPH(_type, _name) \ 233 { \ 234 .name = CEPH_XATTR_NAME(_type, _name), \ 235 .name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \ 236 .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \ 237 .readonly = true, \ 238 .hidden = false, \ 239 .exists_cb = NULL, \ 240 } 241 #define XATTR_LAYOUT_FIELD(_type, _name, _field) \ 242 { \ 243 .name = CEPH_XATTR_NAME2(_type, _name, _field), \ 244 .name_size = sizeof (CEPH_XATTR_NAME2(_type, _name, _field)), \ 245 .getxattr_cb = ceph_vxattrcb_ ## _name ## _ ## _field, \ 246 .readonly = false, \ 247 .hidden = true, \ 248 .exists_cb = ceph_vxattrcb_layout_exists, \ 249 } 250 251 static struct ceph_vxattr ceph_dir_vxattrs[] = { 252 { 253 .name = "ceph.dir.layout", 254 .name_size = sizeof("ceph.dir.layout"), 255 .getxattr_cb = ceph_vxattrcb_layout, 256 .readonly = false, 257 .hidden = true, 258 .exists_cb = ceph_vxattrcb_layout_exists, 259 }, 260 XATTR_LAYOUT_FIELD(dir, layout, stripe_unit), 261 XATTR_LAYOUT_FIELD(dir, layout, stripe_count), 262 XATTR_LAYOUT_FIELD(dir, layout, object_size), 263 XATTR_LAYOUT_FIELD(dir, layout, pool), 264 XATTR_LAYOUT_FIELD(dir, layout, pool_namespace), 265 XATTR_NAME_CEPH(dir, entries), 266 XATTR_NAME_CEPH(dir, files), 267 XATTR_NAME_CEPH(dir, subdirs), 268 XATTR_NAME_CEPH(dir, rentries), 269 XATTR_NAME_CEPH(dir, rfiles), 270 XATTR_NAME_CEPH(dir, rsubdirs), 271 XATTR_NAME_CEPH(dir, rbytes), 272 XATTR_NAME_CEPH(dir, rctime), 273 { .name = NULL, 0 } /* Required table terminator */ 274 }; 275 static size_t ceph_dir_vxattrs_name_size; /* total size of all names */ 276 277 /* files */ 278 279 static struct ceph_vxattr ceph_file_vxattrs[] = { 280 { 281 .name = "ceph.file.layout", 282 .name_size = sizeof("ceph.file.layout"), 283 .getxattr_cb = ceph_vxattrcb_layout, 284 .readonly = false, 285 .hidden = true, 286 .exists_cb = ceph_vxattrcb_layout_exists, 287 }, 288 XATTR_LAYOUT_FIELD(file, layout, stripe_unit), 289 XATTR_LAYOUT_FIELD(file, layout, stripe_count), 290 XATTR_LAYOUT_FIELD(file, layout, object_size), 291 XATTR_LAYOUT_FIELD(file, layout, pool), 292 XATTR_LAYOUT_FIELD(file, layout, pool_namespace), 293 { .name = NULL, 0 } /* Required table terminator */ 294 }; 295 static size_t ceph_file_vxattrs_name_size; /* total size of all names */ 296 297 static struct ceph_vxattr *ceph_inode_vxattrs(struct inode *inode) 298 { 299 if (S_ISDIR(inode->i_mode)) 300 return ceph_dir_vxattrs; 301 else if (S_ISREG(inode->i_mode)) 302 return ceph_file_vxattrs; 303 return NULL; 304 } 305 306 static size_t ceph_vxattrs_name_size(struct ceph_vxattr *vxattrs) 307 { 308 if (vxattrs == ceph_dir_vxattrs) 309 return ceph_dir_vxattrs_name_size; 310 if (vxattrs == ceph_file_vxattrs) 311 return ceph_file_vxattrs_name_size; 312 BUG_ON(vxattrs); 313 return 0; 314 } 315 316 /* 317 * Compute the aggregate size (including terminating '\0') of all 318 * virtual extended attribute names in the given vxattr table. 319 */ 320 static size_t __init vxattrs_name_size(struct ceph_vxattr *vxattrs) 321 { 322 struct ceph_vxattr *vxattr; 323 size_t size = 0; 324 325 for (vxattr = vxattrs; vxattr->name; vxattr++) 326 if (!vxattr->hidden) 327 size += vxattr->name_size; 328 329 return size; 330 } 331 332 /* Routines called at initialization and exit time */ 333 334 void __init ceph_xattr_init(void) 335 { 336 ceph_dir_vxattrs_name_size = vxattrs_name_size(ceph_dir_vxattrs); 337 ceph_file_vxattrs_name_size = vxattrs_name_size(ceph_file_vxattrs); 338 } 339 340 void ceph_xattr_exit(void) 341 { 342 ceph_dir_vxattrs_name_size = 0; 343 ceph_file_vxattrs_name_size = 0; 344 } 345 346 static struct ceph_vxattr *ceph_match_vxattr(struct inode *inode, 347 const char *name) 348 { 349 struct ceph_vxattr *vxattr = ceph_inode_vxattrs(inode); 350 351 if (vxattr) { 352 while (vxattr->name) { 353 if (!strcmp(vxattr->name, name)) 354 return vxattr; 355 vxattr++; 356 } 357 } 358 359 return NULL; 360 } 361 362 static int __set_xattr(struct ceph_inode_info *ci, 363 const char *name, int name_len, 364 const char *val, int val_len, 365 int flags, int update_xattr, 366 struct ceph_inode_xattr **newxattr) 367 { 368 struct rb_node **p; 369 struct rb_node *parent = NULL; 370 struct ceph_inode_xattr *xattr = NULL; 371 int c; 372 int new = 0; 373 374 p = &ci->i_xattrs.index.rb_node; 375 while (*p) { 376 parent = *p; 377 xattr = rb_entry(parent, struct ceph_inode_xattr, node); 378 c = strncmp(name, xattr->name, min(name_len, xattr->name_len)); 379 if (c < 0) 380 p = &(*p)->rb_left; 381 else if (c > 0) 382 p = &(*p)->rb_right; 383 else { 384 if (name_len == xattr->name_len) 385 break; 386 else if (name_len < xattr->name_len) 387 p = &(*p)->rb_left; 388 else 389 p = &(*p)->rb_right; 390 } 391 xattr = NULL; 392 } 393 394 if (update_xattr) { 395 int err = 0; 396 397 if (xattr && (flags & XATTR_CREATE)) 398 err = -EEXIST; 399 else if (!xattr && (flags & XATTR_REPLACE)) 400 err = -ENODATA; 401 if (err) { 402 kfree(name); 403 kfree(val); 404 kfree(*newxattr); 405 return err; 406 } 407 if (update_xattr < 0) { 408 if (xattr) 409 __remove_xattr(ci, xattr); 410 kfree(name); 411 kfree(*newxattr); 412 return 0; 413 } 414 } 415 416 if (!xattr) { 417 new = 1; 418 xattr = *newxattr; 419 xattr->name = name; 420 xattr->name_len = name_len; 421 xattr->should_free_name = update_xattr; 422 423 ci->i_xattrs.count++; 424 dout("__set_xattr count=%d\n", ci->i_xattrs.count); 425 } else { 426 kfree(*newxattr); 427 *newxattr = NULL; 428 if (xattr->should_free_val) 429 kfree((void *)xattr->val); 430 431 if (update_xattr) { 432 kfree((void *)name); 433 name = xattr->name; 434 } 435 ci->i_xattrs.names_size -= xattr->name_len; 436 ci->i_xattrs.vals_size -= xattr->val_len; 437 } 438 ci->i_xattrs.names_size += name_len; 439 ci->i_xattrs.vals_size += val_len; 440 if (val) 441 xattr->val = val; 442 else 443 xattr->val = ""; 444 445 xattr->val_len = val_len; 446 xattr->dirty = update_xattr; 447 xattr->should_free_val = (val && update_xattr); 448 449 if (new) { 450 rb_link_node(&xattr->node, parent, p); 451 rb_insert_color(&xattr->node, &ci->i_xattrs.index); 452 dout("__set_xattr_val p=%p\n", p); 453 } 454 455 dout("__set_xattr_val added %llx.%llx xattr %p %s=%.*s\n", 456 ceph_vinop(&ci->vfs_inode), xattr, name, val_len, val); 457 458 return 0; 459 } 460 461 static struct ceph_inode_xattr *__get_xattr(struct ceph_inode_info *ci, 462 const char *name) 463 { 464 struct rb_node **p; 465 struct rb_node *parent = NULL; 466 struct ceph_inode_xattr *xattr = NULL; 467 int name_len = strlen(name); 468 int c; 469 470 p = &ci->i_xattrs.index.rb_node; 471 while (*p) { 472 parent = *p; 473 xattr = rb_entry(parent, struct ceph_inode_xattr, node); 474 c = strncmp(name, xattr->name, xattr->name_len); 475 if (c == 0 && name_len > xattr->name_len) 476 c = 1; 477 if (c < 0) 478 p = &(*p)->rb_left; 479 else if (c > 0) 480 p = &(*p)->rb_right; 481 else { 482 dout("__get_xattr %s: found %.*s\n", name, 483 xattr->val_len, xattr->val); 484 return xattr; 485 } 486 } 487 488 dout("__get_xattr %s: not found\n", name); 489 490 return NULL; 491 } 492 493 static void __free_xattr(struct ceph_inode_xattr *xattr) 494 { 495 BUG_ON(!xattr); 496 497 if (xattr->should_free_name) 498 kfree((void *)xattr->name); 499 if (xattr->should_free_val) 500 kfree((void *)xattr->val); 501 502 kfree(xattr); 503 } 504 505 static int __remove_xattr(struct ceph_inode_info *ci, 506 struct ceph_inode_xattr *xattr) 507 { 508 if (!xattr) 509 return -ENODATA; 510 511 rb_erase(&xattr->node, &ci->i_xattrs.index); 512 513 if (xattr->should_free_name) 514 kfree((void *)xattr->name); 515 if (xattr->should_free_val) 516 kfree((void *)xattr->val); 517 518 ci->i_xattrs.names_size -= xattr->name_len; 519 ci->i_xattrs.vals_size -= xattr->val_len; 520 ci->i_xattrs.count--; 521 kfree(xattr); 522 523 return 0; 524 } 525 526 static char *__copy_xattr_names(struct ceph_inode_info *ci, 527 char *dest) 528 { 529 struct rb_node *p; 530 struct ceph_inode_xattr *xattr = NULL; 531 532 p = rb_first(&ci->i_xattrs.index); 533 dout("__copy_xattr_names count=%d\n", ci->i_xattrs.count); 534 535 while (p) { 536 xattr = rb_entry(p, struct ceph_inode_xattr, node); 537 memcpy(dest, xattr->name, xattr->name_len); 538 dest[xattr->name_len] = '\0'; 539 540 dout("dest=%s %p (%s) (%d/%d)\n", dest, xattr, xattr->name, 541 xattr->name_len, ci->i_xattrs.names_size); 542 543 dest += xattr->name_len + 1; 544 p = rb_next(p); 545 } 546 547 return dest; 548 } 549 550 void __ceph_destroy_xattrs(struct ceph_inode_info *ci) 551 { 552 struct rb_node *p, *tmp; 553 struct ceph_inode_xattr *xattr = NULL; 554 555 p = rb_first(&ci->i_xattrs.index); 556 557 dout("__ceph_destroy_xattrs p=%p\n", p); 558 559 while (p) { 560 xattr = rb_entry(p, struct ceph_inode_xattr, node); 561 tmp = p; 562 p = rb_next(tmp); 563 dout("__ceph_destroy_xattrs next p=%p (%.*s)\n", p, 564 xattr->name_len, xattr->name); 565 rb_erase(tmp, &ci->i_xattrs.index); 566 567 __free_xattr(xattr); 568 } 569 570 ci->i_xattrs.names_size = 0; 571 ci->i_xattrs.vals_size = 0; 572 ci->i_xattrs.index_version = 0; 573 ci->i_xattrs.count = 0; 574 ci->i_xattrs.index = RB_ROOT; 575 } 576 577 static int __build_xattrs(struct inode *inode) 578 __releases(ci->i_ceph_lock) 579 __acquires(ci->i_ceph_lock) 580 { 581 u32 namelen; 582 u32 numattr = 0; 583 void *p, *end; 584 u32 len; 585 const char *name, *val; 586 struct ceph_inode_info *ci = ceph_inode(inode); 587 int xattr_version; 588 struct ceph_inode_xattr **xattrs = NULL; 589 int err = 0; 590 int i; 591 592 dout("__build_xattrs() len=%d\n", 593 ci->i_xattrs.blob ? (int)ci->i_xattrs.blob->vec.iov_len : 0); 594 595 if (ci->i_xattrs.index_version >= ci->i_xattrs.version) 596 return 0; /* already built */ 597 598 __ceph_destroy_xattrs(ci); 599 600 start: 601 /* updated internal xattr rb tree */ 602 if (ci->i_xattrs.blob && ci->i_xattrs.blob->vec.iov_len > 4) { 603 p = ci->i_xattrs.blob->vec.iov_base; 604 end = p + ci->i_xattrs.blob->vec.iov_len; 605 ceph_decode_32_safe(&p, end, numattr, bad); 606 xattr_version = ci->i_xattrs.version; 607 spin_unlock(&ci->i_ceph_lock); 608 609 xattrs = kcalloc(numattr, sizeof(struct ceph_inode_xattr *), 610 GFP_NOFS); 611 err = -ENOMEM; 612 if (!xattrs) 613 goto bad_lock; 614 615 for (i = 0; i < numattr; i++) { 616 xattrs[i] = kmalloc(sizeof(struct ceph_inode_xattr), 617 GFP_NOFS); 618 if (!xattrs[i]) 619 goto bad_lock; 620 } 621 622 spin_lock(&ci->i_ceph_lock); 623 if (ci->i_xattrs.version != xattr_version) { 624 /* lost a race, retry */ 625 for (i = 0; i < numattr; i++) 626 kfree(xattrs[i]); 627 kfree(xattrs); 628 xattrs = NULL; 629 goto start; 630 } 631 err = -EIO; 632 while (numattr--) { 633 ceph_decode_32_safe(&p, end, len, bad); 634 namelen = len; 635 name = p; 636 p += len; 637 ceph_decode_32_safe(&p, end, len, bad); 638 val = p; 639 p += len; 640 641 err = __set_xattr(ci, name, namelen, val, len, 642 0, 0, &xattrs[numattr]); 643 644 if (err < 0) 645 goto bad; 646 } 647 kfree(xattrs); 648 } 649 ci->i_xattrs.index_version = ci->i_xattrs.version; 650 ci->i_xattrs.dirty = false; 651 652 return err; 653 bad_lock: 654 spin_lock(&ci->i_ceph_lock); 655 bad: 656 if (xattrs) { 657 for (i = 0; i < numattr; i++) 658 kfree(xattrs[i]); 659 kfree(xattrs); 660 } 661 ci->i_xattrs.names_size = 0; 662 return err; 663 } 664 665 static int __get_required_blob_size(struct ceph_inode_info *ci, int name_size, 666 int val_size) 667 { 668 /* 669 * 4 bytes for the length, and additional 4 bytes per each xattr name, 670 * 4 bytes per each value 671 */ 672 int size = 4 + ci->i_xattrs.count*(4 + 4) + 673 ci->i_xattrs.names_size + 674 ci->i_xattrs.vals_size; 675 dout("__get_required_blob_size c=%d names.size=%d vals.size=%d\n", 676 ci->i_xattrs.count, ci->i_xattrs.names_size, 677 ci->i_xattrs.vals_size); 678 679 if (name_size) 680 size += 4 + 4 + name_size + val_size; 681 682 return size; 683 } 684 685 /* 686 * If there are dirty xattrs, reencode xattrs into the prealloc_blob 687 * and swap into place. 688 */ 689 void __ceph_build_xattrs_blob(struct ceph_inode_info *ci) 690 { 691 struct rb_node *p; 692 struct ceph_inode_xattr *xattr = NULL; 693 void *dest; 694 695 dout("__build_xattrs_blob %p\n", &ci->vfs_inode); 696 if (ci->i_xattrs.dirty) { 697 int need = __get_required_blob_size(ci, 0, 0); 698 699 BUG_ON(need > ci->i_xattrs.prealloc_blob->alloc_len); 700 701 p = rb_first(&ci->i_xattrs.index); 702 dest = ci->i_xattrs.prealloc_blob->vec.iov_base; 703 704 ceph_encode_32(&dest, ci->i_xattrs.count); 705 while (p) { 706 xattr = rb_entry(p, struct ceph_inode_xattr, node); 707 708 ceph_encode_32(&dest, xattr->name_len); 709 memcpy(dest, xattr->name, xattr->name_len); 710 dest += xattr->name_len; 711 ceph_encode_32(&dest, xattr->val_len); 712 memcpy(dest, xattr->val, xattr->val_len); 713 dest += xattr->val_len; 714 715 p = rb_next(p); 716 } 717 718 /* adjust buffer len; it may be larger than we need */ 719 ci->i_xattrs.prealloc_blob->vec.iov_len = 720 dest - ci->i_xattrs.prealloc_blob->vec.iov_base; 721 722 if (ci->i_xattrs.blob) 723 ceph_buffer_put(ci->i_xattrs.blob); 724 ci->i_xattrs.blob = ci->i_xattrs.prealloc_blob; 725 ci->i_xattrs.prealloc_blob = NULL; 726 ci->i_xattrs.dirty = false; 727 ci->i_xattrs.version++; 728 } 729 } 730 731 static inline int __get_request_mask(struct inode *in) { 732 struct ceph_mds_request *req = current->journal_info; 733 int mask = 0; 734 if (req && req->r_target_inode == in) { 735 if (req->r_op == CEPH_MDS_OP_LOOKUP || 736 req->r_op == CEPH_MDS_OP_LOOKUPINO || 737 req->r_op == CEPH_MDS_OP_LOOKUPPARENT || 738 req->r_op == CEPH_MDS_OP_GETATTR) { 739 mask = le32_to_cpu(req->r_args.getattr.mask); 740 } else if (req->r_op == CEPH_MDS_OP_OPEN || 741 req->r_op == CEPH_MDS_OP_CREATE) { 742 mask = le32_to_cpu(req->r_args.open.mask); 743 } 744 } 745 return mask; 746 } 747 748 ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value, 749 size_t size) 750 { 751 struct ceph_inode_info *ci = ceph_inode(inode); 752 struct ceph_inode_xattr *xattr; 753 struct ceph_vxattr *vxattr = NULL; 754 int req_mask; 755 int err; 756 757 /* let's see if a virtual xattr was requested */ 758 vxattr = ceph_match_vxattr(inode, name); 759 if (vxattr) { 760 err = ceph_do_getattr(inode, 0, true); 761 if (err) 762 return err; 763 err = -ENODATA; 764 if (!(vxattr->exists_cb && !vxattr->exists_cb(ci))) 765 err = vxattr->getxattr_cb(ci, value, size); 766 return err; 767 } 768 769 req_mask = __get_request_mask(inode); 770 771 spin_lock(&ci->i_ceph_lock); 772 dout("getxattr %p ver=%lld index_ver=%lld\n", inode, 773 ci->i_xattrs.version, ci->i_xattrs.index_version); 774 775 if (ci->i_xattrs.version == 0 || 776 !((req_mask & CEPH_CAP_XATTR_SHARED) || 777 __ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1))) { 778 spin_unlock(&ci->i_ceph_lock); 779 780 /* security module gets xattr while filling trace */ 781 if (current->journal_info) { 782 pr_warn_ratelimited("sync getxattr %p " 783 "during filling trace\n", inode); 784 return -EBUSY; 785 } 786 787 /* get xattrs from mds (if we don't already have them) */ 788 err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR, true); 789 if (err) 790 return err; 791 spin_lock(&ci->i_ceph_lock); 792 } 793 794 err = __build_xattrs(inode); 795 if (err < 0) 796 goto out; 797 798 err = -ENODATA; /* == ENOATTR */ 799 xattr = __get_xattr(ci, name); 800 if (!xattr) 801 goto out; 802 803 err = -ERANGE; 804 if (size && size < xattr->val_len) 805 goto out; 806 807 err = xattr->val_len; 808 if (size == 0) 809 goto out; 810 811 memcpy(value, xattr->val, xattr->val_len); 812 813 if (current->journal_info && 814 !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)) 815 ci->i_ceph_flags |= CEPH_I_SEC_INITED; 816 out: 817 spin_unlock(&ci->i_ceph_lock); 818 return err; 819 } 820 821 ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size) 822 { 823 struct inode *inode = d_inode(dentry); 824 struct ceph_inode_info *ci = ceph_inode(inode); 825 struct ceph_vxattr *vxattrs = ceph_inode_vxattrs(inode); 826 u32 vir_namelen = 0; 827 u32 namelen; 828 int err; 829 u32 len; 830 int i; 831 832 spin_lock(&ci->i_ceph_lock); 833 dout("listxattr %p ver=%lld index_ver=%lld\n", inode, 834 ci->i_xattrs.version, ci->i_xattrs.index_version); 835 836 if (ci->i_xattrs.version == 0 || 837 !__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1)) { 838 spin_unlock(&ci->i_ceph_lock); 839 err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR, true); 840 if (err) 841 return err; 842 spin_lock(&ci->i_ceph_lock); 843 } 844 845 err = __build_xattrs(inode); 846 if (err < 0) 847 goto out; 848 /* 849 * Start with virtual dir xattr names (if any) (including 850 * terminating '\0' characters for each). 851 */ 852 vir_namelen = ceph_vxattrs_name_size(vxattrs); 853 854 /* adding 1 byte per each variable due to the null termination */ 855 namelen = ci->i_xattrs.names_size + ci->i_xattrs.count; 856 err = -ERANGE; 857 if (size && vir_namelen + namelen > size) 858 goto out; 859 860 err = namelen + vir_namelen; 861 if (size == 0) 862 goto out; 863 864 names = __copy_xattr_names(ci, names); 865 866 /* virtual xattr names, too */ 867 err = namelen; 868 if (vxattrs) { 869 for (i = 0; vxattrs[i].name; i++) { 870 if (!vxattrs[i].hidden && 871 !(vxattrs[i].exists_cb && 872 !vxattrs[i].exists_cb(ci))) { 873 len = sprintf(names, "%s", vxattrs[i].name); 874 names += len + 1; 875 err += len + 1; 876 } 877 } 878 } 879 880 out: 881 spin_unlock(&ci->i_ceph_lock); 882 return err; 883 } 884 885 static int ceph_sync_setxattr(struct inode *inode, const char *name, 886 const char *value, size_t size, int flags) 887 { 888 struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb); 889 struct ceph_inode_info *ci = ceph_inode(inode); 890 struct ceph_mds_request *req; 891 struct ceph_mds_client *mdsc = fsc->mdsc; 892 struct ceph_pagelist *pagelist = NULL; 893 int op = CEPH_MDS_OP_SETXATTR; 894 int err; 895 896 if (size > 0) { 897 /* copy value into pagelist */ 898 pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS); 899 if (!pagelist) 900 return -ENOMEM; 901 902 ceph_pagelist_init(pagelist); 903 err = ceph_pagelist_append(pagelist, value, size); 904 if (err) 905 goto out; 906 } else if (!value) { 907 if (flags & CEPH_XATTR_REPLACE) 908 op = CEPH_MDS_OP_RMXATTR; 909 else 910 flags |= CEPH_XATTR_REMOVE; 911 } 912 913 dout("setxattr value=%.*s\n", (int)size, value); 914 915 /* do request */ 916 req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); 917 if (IS_ERR(req)) { 918 err = PTR_ERR(req); 919 goto out; 920 } 921 922 req->r_path2 = kstrdup(name, GFP_NOFS); 923 if (!req->r_path2) { 924 ceph_mdsc_put_request(req); 925 err = -ENOMEM; 926 goto out; 927 } 928 929 if (op == CEPH_MDS_OP_SETXATTR) { 930 req->r_args.setxattr.flags = cpu_to_le32(flags); 931 req->r_pagelist = pagelist; 932 pagelist = NULL; 933 } 934 935 req->r_inode = inode; 936 ihold(inode); 937 req->r_num_caps = 1; 938 req->r_inode_drop = CEPH_CAP_XATTR_SHARED; 939 940 dout("xattr.ver (before): %lld\n", ci->i_xattrs.version); 941 err = ceph_mdsc_do_request(mdsc, NULL, req); 942 ceph_mdsc_put_request(req); 943 dout("xattr.ver (after): %lld\n", ci->i_xattrs.version); 944 945 out: 946 if (pagelist) 947 ceph_pagelist_release(pagelist); 948 return err; 949 } 950 951 int __ceph_setxattr(struct inode *inode, const char *name, 952 const void *value, size_t size, int flags) 953 { 954 struct ceph_vxattr *vxattr; 955 struct ceph_inode_info *ci = ceph_inode(inode); 956 struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; 957 struct ceph_cap_flush *prealloc_cf = NULL; 958 int issued; 959 int err; 960 int dirty = 0; 961 int name_len = strlen(name); 962 int val_len = size; 963 char *newname = NULL; 964 char *newval = NULL; 965 struct ceph_inode_xattr *xattr = NULL; 966 int required_blob_size; 967 bool lock_snap_rwsem = false; 968 969 if (ceph_snap(inode) != CEPH_NOSNAP) 970 return -EROFS; 971 972 vxattr = ceph_match_vxattr(inode, name); 973 if (vxattr && vxattr->readonly) 974 return -EOPNOTSUPP; 975 976 /* pass any unhandled ceph.* xattrs through to the MDS */ 977 if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN)) 978 goto do_sync_unlocked; 979 980 /* preallocate memory for xattr name, value, index node */ 981 err = -ENOMEM; 982 newname = kmemdup(name, name_len + 1, GFP_NOFS); 983 if (!newname) 984 goto out; 985 986 if (val_len) { 987 newval = kmemdup(value, val_len, GFP_NOFS); 988 if (!newval) 989 goto out; 990 } 991 992 xattr = kmalloc(sizeof(struct ceph_inode_xattr), GFP_NOFS); 993 if (!xattr) 994 goto out; 995 996 prealloc_cf = ceph_alloc_cap_flush(); 997 if (!prealloc_cf) 998 goto out; 999 1000 spin_lock(&ci->i_ceph_lock); 1001 retry: 1002 issued = __ceph_caps_issued(ci, NULL); 1003 if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL)) 1004 goto do_sync; 1005 1006 if (!lock_snap_rwsem && !ci->i_head_snapc) { 1007 lock_snap_rwsem = true; 1008 if (!down_read_trylock(&mdsc->snap_rwsem)) { 1009 spin_unlock(&ci->i_ceph_lock); 1010 down_read(&mdsc->snap_rwsem); 1011 spin_lock(&ci->i_ceph_lock); 1012 goto retry; 1013 } 1014 } 1015 1016 dout("setxattr %p issued %s\n", inode, ceph_cap_string(issued)); 1017 __build_xattrs(inode); 1018 1019 required_blob_size = __get_required_blob_size(ci, name_len, val_len); 1020 1021 if (!ci->i_xattrs.prealloc_blob || 1022 required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) { 1023 struct ceph_buffer *blob; 1024 1025 spin_unlock(&ci->i_ceph_lock); 1026 dout(" preaallocating new blob size=%d\n", required_blob_size); 1027 blob = ceph_buffer_new(required_blob_size, GFP_NOFS); 1028 if (!blob) 1029 goto do_sync_unlocked; 1030 spin_lock(&ci->i_ceph_lock); 1031 if (ci->i_xattrs.prealloc_blob) 1032 ceph_buffer_put(ci->i_xattrs.prealloc_blob); 1033 ci->i_xattrs.prealloc_blob = blob; 1034 goto retry; 1035 } 1036 1037 err = __set_xattr(ci, newname, name_len, newval, val_len, 1038 flags, value ? 1 : -1, &xattr); 1039 1040 if (!err) { 1041 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL, 1042 &prealloc_cf); 1043 ci->i_xattrs.dirty = true; 1044 inode->i_ctime = current_time(inode); 1045 } 1046 1047 spin_unlock(&ci->i_ceph_lock); 1048 if (lock_snap_rwsem) 1049 up_read(&mdsc->snap_rwsem); 1050 if (dirty) 1051 __mark_inode_dirty(inode, dirty); 1052 ceph_free_cap_flush(prealloc_cf); 1053 return err; 1054 1055 do_sync: 1056 spin_unlock(&ci->i_ceph_lock); 1057 do_sync_unlocked: 1058 if (lock_snap_rwsem) 1059 up_read(&mdsc->snap_rwsem); 1060 1061 /* security module set xattr while filling trace */ 1062 if (current->journal_info) { 1063 pr_warn_ratelimited("sync setxattr %p " 1064 "during filling trace\n", inode); 1065 err = -EBUSY; 1066 } else { 1067 err = ceph_sync_setxattr(inode, name, value, size, flags); 1068 } 1069 out: 1070 ceph_free_cap_flush(prealloc_cf); 1071 kfree(newname); 1072 kfree(newval); 1073 kfree(xattr); 1074 return err; 1075 } 1076 1077 static int ceph_get_xattr_handler(const struct xattr_handler *handler, 1078 struct dentry *dentry, struct inode *inode, 1079 const char *name, void *value, size_t size) 1080 { 1081 if (!ceph_is_valid_xattr(name)) 1082 return -EOPNOTSUPP; 1083 return __ceph_getxattr(inode, name, value, size); 1084 } 1085 1086 static int ceph_set_xattr_handler(const struct xattr_handler *handler, 1087 struct dentry *unused, struct inode *inode, 1088 const char *name, const void *value, 1089 size_t size, int flags) 1090 { 1091 if (!ceph_is_valid_xattr(name)) 1092 return -EOPNOTSUPP; 1093 return __ceph_setxattr(inode, name, value, size, flags); 1094 } 1095 1096 static const struct xattr_handler ceph_other_xattr_handler = { 1097 .prefix = "", /* match any name => handlers called with full name */ 1098 .get = ceph_get_xattr_handler, 1099 .set = ceph_set_xattr_handler, 1100 }; 1101 1102 #ifdef CONFIG_SECURITY 1103 bool ceph_security_xattr_wanted(struct inode *in) 1104 { 1105 return in->i_security != NULL; 1106 } 1107 1108 bool ceph_security_xattr_deadlock(struct inode *in) 1109 { 1110 struct ceph_inode_info *ci; 1111 bool ret; 1112 if (!in->i_security) 1113 return false; 1114 ci = ceph_inode(in); 1115 spin_lock(&ci->i_ceph_lock); 1116 ret = !(ci->i_ceph_flags & CEPH_I_SEC_INITED) && 1117 !(ci->i_xattrs.version > 0 && 1118 __ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 0)); 1119 spin_unlock(&ci->i_ceph_lock); 1120 return ret; 1121 } 1122 #endif 1123