1 #include <linux/ceph/ceph_debug.h> 2 3 #include <linux/spinlock.h> 4 #include <linux/fs_struct.h> 5 #include <linux/namei.h> 6 #include <linux/slab.h> 7 #include <linux/sched.h> 8 9 #include "super.h" 10 #include "mds_client.h" 11 12 /* 13 * Directory operations: readdir, lookup, create, link, unlink, 14 * rename, etc. 15 */ 16 17 /* 18 * Ceph MDS operations are specified in terms of a base ino and 19 * relative path. Thus, the client can specify an operation on a 20 * specific inode (e.g., a getattr due to fstat(2)), or as a path 21 * relative to, say, the root directory. 22 * 23 * Normally, we limit ourselves to strict inode ops (no path component) 24 * or dentry operations (a single path component relative to an ino). The 25 * exception to this is open_root_dentry(), which will open the mount 26 * point by name. 27 */ 28 29 const struct dentry_operations ceph_dentry_ops; 30 31 /* 32 * Initialize ceph dentry state. 33 */ 34 int ceph_init_dentry(struct dentry *dentry) 35 { 36 struct ceph_dentry_info *di; 37 38 if (dentry->d_fsdata) 39 return 0; 40 41 di = kmem_cache_zalloc(ceph_dentry_cachep, GFP_KERNEL); 42 if (!di) 43 return -ENOMEM; /* oh well */ 44 45 spin_lock(&dentry->d_lock); 46 if (dentry->d_fsdata) { 47 /* lost a race */ 48 kmem_cache_free(ceph_dentry_cachep, di); 49 goto out_unlock; 50 } 51 52 if (ceph_snap(d_inode(dentry->d_parent)) == CEPH_NOSNAP) 53 d_set_d_op(dentry, &ceph_dentry_ops); 54 else if (ceph_snap(d_inode(dentry->d_parent)) == CEPH_SNAPDIR) 55 d_set_d_op(dentry, &ceph_snapdir_dentry_ops); 56 else 57 d_set_d_op(dentry, &ceph_snap_dentry_ops); 58 59 di->dentry = dentry; 60 di->lease_session = NULL; 61 dentry->d_time = jiffies; 62 /* avoid reordering d_fsdata setup so that the check above is safe */ 63 smp_mb(); 64 dentry->d_fsdata = di; 65 ceph_dentry_lru_add(dentry); 66 out_unlock: 67 spin_unlock(&dentry->d_lock); 68 return 0; 69 } 70 71 /* 72 * for readdir, we encode the directory frag and offset within that 73 * frag into f_pos. 74 */ 75 static unsigned fpos_frag(loff_t p) 76 { 77 return p >> 32; 78 } 79 static unsigned fpos_off(loff_t p) 80 { 81 return p & 0xffffffff; 82 } 83 84 static int fpos_cmp(loff_t l, loff_t r) 85 { 86 int v = ceph_frag_compare(fpos_frag(l), fpos_frag(r)); 87 if (v) 88 return v; 89 return (int)(fpos_off(l) - fpos_off(r)); 90 } 91 92 /* 93 * make note of the last dentry we read, so we can 94 * continue at the same lexicographical point, 95 * regardless of what dir changes take place on the 96 * server. 97 */ 98 static int note_last_dentry(struct ceph_file_info *fi, const char *name, 99 int len, unsigned next_offset) 100 { 101 char *buf = kmalloc(len+1, GFP_KERNEL); 102 if (!buf) 103 return -ENOMEM; 104 kfree(fi->last_name); 105 fi->last_name = buf; 106 memcpy(fi->last_name, name, len); 107 fi->last_name[len] = 0; 108 fi->next_offset = next_offset; 109 dout("note_last_dentry '%s'\n", fi->last_name); 110 return 0; 111 } 112 113 114 static struct dentry * 115 __dcache_find_get_entry(struct dentry *parent, u64 idx, 116 struct ceph_readdir_cache_control *cache_ctl) 117 { 118 struct inode *dir = d_inode(parent); 119 struct dentry *dentry; 120 unsigned idx_mask = (PAGE_SIZE / sizeof(struct dentry *)) - 1; 121 loff_t ptr_pos = idx * sizeof(struct dentry *); 122 pgoff_t ptr_pgoff = ptr_pos >> PAGE_SHIFT; 123 124 if (ptr_pos >= i_size_read(dir)) 125 return NULL; 126 127 if (!cache_ctl->page || ptr_pgoff != page_index(cache_ctl->page)) { 128 ceph_readdir_cache_release(cache_ctl); 129 cache_ctl->page = find_lock_page(&dir->i_data, ptr_pgoff); 130 if (!cache_ctl->page) { 131 dout(" page %lu not found\n", ptr_pgoff); 132 return ERR_PTR(-EAGAIN); 133 } 134 /* reading/filling the cache are serialized by 135 i_mutex, no need to use page lock */ 136 unlock_page(cache_ctl->page); 137 cache_ctl->dentries = kmap(cache_ctl->page); 138 } 139 140 cache_ctl->index = idx & idx_mask; 141 142 rcu_read_lock(); 143 spin_lock(&parent->d_lock); 144 /* check i_size again here, because empty directory can be 145 * marked as complete while not holding the i_mutex. */ 146 if (ceph_dir_is_complete_ordered(dir) && ptr_pos < i_size_read(dir)) 147 dentry = cache_ctl->dentries[cache_ctl->index]; 148 else 149 dentry = NULL; 150 spin_unlock(&parent->d_lock); 151 if (dentry && !lockref_get_not_dead(&dentry->d_lockref)) 152 dentry = NULL; 153 rcu_read_unlock(); 154 return dentry ? : ERR_PTR(-EAGAIN); 155 } 156 157 /* 158 * When possible, we try to satisfy a readdir by peeking at the 159 * dcache. We make this work by carefully ordering dentries on 160 * d_child when we initially get results back from the MDS, and 161 * falling back to a "normal" sync readdir if any dentries in the dir 162 * are dropped. 163 * 164 * Complete dir indicates that we have all dentries in the dir. It is 165 * defined IFF we hold CEPH_CAP_FILE_SHARED (which will be revoked by 166 * the MDS if/when the directory is modified). 167 */ 168 static int __dcache_readdir(struct file *file, struct dir_context *ctx, 169 u32 shared_gen) 170 { 171 struct ceph_file_info *fi = file->private_data; 172 struct dentry *parent = file->f_path.dentry; 173 struct inode *dir = d_inode(parent); 174 struct dentry *dentry, *last = NULL; 175 struct ceph_dentry_info *di; 176 struct ceph_readdir_cache_control cache_ctl = {}; 177 u64 idx = 0; 178 int err = 0; 179 180 dout("__dcache_readdir %p v%u at %llu\n", dir, shared_gen, ctx->pos); 181 182 /* search start position */ 183 if (ctx->pos > 2) { 184 u64 count = div_u64(i_size_read(dir), sizeof(struct dentry *)); 185 while (count > 0) { 186 u64 step = count >> 1; 187 dentry = __dcache_find_get_entry(parent, idx + step, 188 &cache_ctl); 189 if (!dentry) { 190 /* use linar search */ 191 idx = 0; 192 break; 193 } 194 if (IS_ERR(dentry)) { 195 err = PTR_ERR(dentry); 196 goto out; 197 } 198 di = ceph_dentry(dentry); 199 spin_lock(&dentry->d_lock); 200 if (fpos_cmp(di->offset, ctx->pos) < 0) { 201 idx += step + 1; 202 count -= step + 1; 203 } else { 204 count = step; 205 } 206 spin_unlock(&dentry->d_lock); 207 dput(dentry); 208 } 209 210 dout("__dcache_readdir %p cache idx %llu\n", dir, idx); 211 } 212 213 214 for (;;) { 215 bool emit_dentry = false; 216 dentry = __dcache_find_get_entry(parent, idx++, &cache_ctl); 217 if (!dentry) { 218 fi->flags |= CEPH_F_ATEND; 219 err = 0; 220 break; 221 } 222 if (IS_ERR(dentry)) { 223 err = PTR_ERR(dentry); 224 goto out; 225 } 226 227 di = ceph_dentry(dentry); 228 spin_lock(&dentry->d_lock); 229 if (di->lease_shared_gen == shared_gen && 230 d_really_is_positive(dentry) && 231 fpos_cmp(ctx->pos, di->offset) <= 0) { 232 emit_dentry = true; 233 } 234 spin_unlock(&dentry->d_lock); 235 236 if (emit_dentry) { 237 dout(" %llu (%llu) dentry %p %pd %p\n", di->offset, ctx->pos, 238 dentry, dentry, d_inode(dentry)); 239 ctx->pos = di->offset; 240 if (!dir_emit(ctx, dentry->d_name.name, 241 dentry->d_name.len, 242 ceph_translate_ino(dentry->d_sb, 243 d_inode(dentry)->i_ino), 244 d_inode(dentry)->i_mode >> 12)) { 245 dput(dentry); 246 err = 0; 247 break; 248 } 249 ctx->pos++; 250 251 if (last) 252 dput(last); 253 last = dentry; 254 } else { 255 dput(dentry); 256 } 257 } 258 out: 259 ceph_readdir_cache_release(&cache_ctl); 260 if (last) { 261 int ret; 262 di = ceph_dentry(last); 263 ret = note_last_dentry(fi, last->d_name.name, last->d_name.len, 264 fpos_off(di->offset) + 1); 265 if (ret < 0) 266 err = ret; 267 dput(last); 268 } 269 return err; 270 } 271 272 static int ceph_readdir(struct file *file, struct dir_context *ctx) 273 { 274 struct ceph_file_info *fi = file->private_data; 275 struct inode *inode = file_inode(file); 276 struct ceph_inode_info *ci = ceph_inode(inode); 277 struct ceph_fs_client *fsc = ceph_inode_to_client(inode); 278 struct ceph_mds_client *mdsc = fsc->mdsc; 279 unsigned frag = fpos_frag(ctx->pos); 280 int off = fpos_off(ctx->pos); 281 int err; 282 u32 ftype; 283 struct ceph_mds_reply_info_parsed *rinfo; 284 285 dout("readdir %p file %p frag %u off %u\n", inode, file, frag, off); 286 if (fi->flags & CEPH_F_ATEND) 287 return 0; 288 289 /* always start with . and .. */ 290 if (ctx->pos == 0) { 291 dout("readdir off 0 -> '.'\n"); 292 if (!dir_emit(ctx, ".", 1, 293 ceph_translate_ino(inode->i_sb, inode->i_ino), 294 inode->i_mode >> 12)) 295 return 0; 296 ctx->pos = 1; 297 off = 1; 298 } 299 if (ctx->pos == 1) { 300 ino_t ino = parent_ino(file->f_path.dentry); 301 dout("readdir off 1 -> '..'\n"); 302 if (!dir_emit(ctx, "..", 2, 303 ceph_translate_ino(inode->i_sb, ino), 304 inode->i_mode >> 12)) 305 return 0; 306 ctx->pos = 2; 307 off = 2; 308 } 309 310 /* can we use the dcache? */ 311 spin_lock(&ci->i_ceph_lock); 312 if (ceph_test_mount_opt(fsc, DCACHE) && 313 !ceph_test_mount_opt(fsc, NOASYNCREADDIR) && 314 ceph_snap(inode) != CEPH_SNAPDIR && 315 __ceph_dir_is_complete_ordered(ci) && 316 __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) { 317 u32 shared_gen = ci->i_shared_gen; 318 spin_unlock(&ci->i_ceph_lock); 319 err = __dcache_readdir(file, ctx, shared_gen); 320 if (err != -EAGAIN) 321 return err; 322 frag = fpos_frag(ctx->pos); 323 off = fpos_off(ctx->pos); 324 } else { 325 spin_unlock(&ci->i_ceph_lock); 326 } 327 328 /* proceed with a normal readdir */ 329 more: 330 /* do we have the correct frag content buffered? */ 331 if (fi->frag != frag || fi->last_readdir == NULL) { 332 struct ceph_mds_request *req; 333 int op = ceph_snap(inode) == CEPH_SNAPDIR ? 334 CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR; 335 336 /* discard old result, if any */ 337 if (fi->last_readdir) { 338 ceph_mdsc_put_request(fi->last_readdir); 339 fi->last_readdir = NULL; 340 } 341 342 dout("readdir fetching %llx.%llx frag %x offset '%s'\n", 343 ceph_vinop(inode), frag, fi->last_name); 344 req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); 345 if (IS_ERR(req)) 346 return PTR_ERR(req); 347 err = ceph_alloc_readdir_reply_buffer(req, inode); 348 if (err) { 349 ceph_mdsc_put_request(req); 350 return err; 351 } 352 /* hints to request -> mds selection code */ 353 req->r_direct_mode = USE_AUTH_MDS; 354 req->r_direct_hash = ceph_frag_value(frag); 355 req->r_direct_is_hash = true; 356 if (fi->last_name) { 357 req->r_path2 = kstrdup(fi->last_name, GFP_KERNEL); 358 if (!req->r_path2) { 359 ceph_mdsc_put_request(req); 360 return -ENOMEM; 361 } 362 } 363 req->r_dir_release_cnt = fi->dir_release_count; 364 req->r_dir_ordered_cnt = fi->dir_ordered_count; 365 req->r_readdir_cache_idx = fi->readdir_cache_idx; 366 req->r_readdir_offset = fi->next_offset; 367 req->r_args.readdir.frag = cpu_to_le32(frag); 368 369 req->r_inode = inode; 370 ihold(inode); 371 req->r_dentry = dget(file->f_path.dentry); 372 err = ceph_mdsc_do_request(mdsc, NULL, req); 373 if (err < 0) { 374 ceph_mdsc_put_request(req); 375 return err; 376 } 377 dout("readdir got and parsed readdir result=%d" 378 " on frag %x, end=%d, complete=%d\n", err, frag, 379 (int)req->r_reply_info.dir_end, 380 (int)req->r_reply_info.dir_complete); 381 382 383 /* note next offset and last dentry name */ 384 rinfo = &req->r_reply_info; 385 if (le32_to_cpu(rinfo->dir_dir->frag) != frag) { 386 frag = le32_to_cpu(rinfo->dir_dir->frag); 387 off = req->r_readdir_offset; 388 fi->next_offset = off; 389 } 390 391 fi->frag = frag; 392 fi->offset = fi->next_offset; 393 fi->last_readdir = req; 394 395 if (req->r_did_prepopulate) { 396 fi->readdir_cache_idx = req->r_readdir_cache_idx; 397 if (fi->readdir_cache_idx < 0) { 398 /* preclude from marking dir ordered */ 399 fi->dir_ordered_count = 0; 400 } else if (ceph_frag_is_leftmost(frag) && off == 2) { 401 /* note dir version at start of readdir so 402 * we can tell if any dentries get dropped */ 403 fi->dir_release_count = req->r_dir_release_cnt; 404 fi->dir_ordered_count = req->r_dir_ordered_cnt; 405 } 406 } else { 407 dout("readdir !did_prepopulate"); 408 /* disable readdir cache */ 409 fi->readdir_cache_idx = -1; 410 /* preclude from marking dir complete */ 411 fi->dir_release_count = 0; 412 } 413 414 if (req->r_reply_info.dir_end) { 415 kfree(fi->last_name); 416 fi->last_name = NULL; 417 fi->next_offset = 2; 418 } else { 419 struct ceph_mds_reply_dir_entry *rde = 420 rinfo->dir_entries + (rinfo->dir_nr-1); 421 err = note_last_dentry(fi, rde->name, rde->name_len, 422 fi->next_offset + rinfo->dir_nr); 423 if (err) 424 return err; 425 } 426 } 427 428 rinfo = &fi->last_readdir->r_reply_info; 429 dout("readdir frag %x num %d off %d chunkoff %d\n", frag, 430 rinfo->dir_nr, off, fi->offset); 431 432 ctx->pos = ceph_make_fpos(frag, off); 433 while (off >= fi->offset && off - fi->offset < rinfo->dir_nr) { 434 struct ceph_mds_reply_dir_entry *rde = 435 rinfo->dir_entries + (off - fi->offset); 436 struct ceph_vino vino; 437 ino_t ino; 438 439 dout("readdir off %d (%d/%d) -> %lld '%.*s' %p\n", 440 off, off - fi->offset, rinfo->dir_nr, ctx->pos, 441 rde->name_len, rde->name, &rde->inode.in); 442 BUG_ON(!rde->inode.in); 443 ftype = le32_to_cpu(rde->inode.in->mode) >> 12; 444 vino.ino = le64_to_cpu(rde->inode.in->ino); 445 vino.snap = le64_to_cpu(rde->inode.in->snapid); 446 ino = ceph_vino_to_ino(vino); 447 if (!dir_emit(ctx, rde->name, rde->name_len, 448 ceph_translate_ino(inode->i_sb, ino), ftype)) { 449 dout("filldir stopping us...\n"); 450 return 0; 451 } 452 off++; 453 ctx->pos++; 454 } 455 456 if (fi->last_name) { 457 ceph_mdsc_put_request(fi->last_readdir); 458 fi->last_readdir = NULL; 459 goto more; 460 } 461 462 /* more frags? */ 463 if (!ceph_frag_is_rightmost(frag)) { 464 frag = ceph_frag_next(frag); 465 off = 2; 466 ctx->pos = ceph_make_fpos(frag, off); 467 dout("readdir next frag is %x\n", frag); 468 goto more; 469 } 470 fi->flags |= CEPH_F_ATEND; 471 472 /* 473 * if dir_release_count still matches the dir, no dentries 474 * were released during the whole readdir, and we should have 475 * the complete dir contents in our cache. 476 */ 477 if (atomic64_read(&ci->i_release_count) == fi->dir_release_count) { 478 spin_lock(&ci->i_ceph_lock); 479 if (fi->dir_ordered_count == atomic64_read(&ci->i_ordered_count)) { 480 dout(" marking %p complete and ordered\n", inode); 481 /* use i_size to track number of entries in 482 * readdir cache */ 483 BUG_ON(fi->readdir_cache_idx < 0); 484 i_size_write(inode, fi->readdir_cache_idx * 485 sizeof(struct dentry*)); 486 } else { 487 dout(" marking %p complete\n", inode); 488 } 489 __ceph_dir_set_complete(ci, fi->dir_release_count, 490 fi->dir_ordered_count); 491 spin_unlock(&ci->i_ceph_lock); 492 } 493 494 dout("readdir %p file %p done.\n", inode, file); 495 return 0; 496 } 497 498 static void reset_readdir(struct ceph_file_info *fi, unsigned frag) 499 { 500 if (fi->last_readdir) { 501 ceph_mdsc_put_request(fi->last_readdir); 502 fi->last_readdir = NULL; 503 } 504 kfree(fi->last_name); 505 fi->last_name = NULL; 506 fi->dir_release_count = 0; 507 fi->readdir_cache_idx = -1; 508 fi->next_offset = 2; /* compensate for . and .. */ 509 fi->flags &= ~CEPH_F_ATEND; 510 } 511 512 static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence) 513 { 514 struct ceph_file_info *fi = file->private_data; 515 struct inode *inode = file->f_mapping->host; 516 loff_t old_offset = ceph_make_fpos(fi->frag, fi->next_offset); 517 loff_t retval; 518 519 inode_lock(inode); 520 retval = -EINVAL; 521 switch (whence) { 522 case SEEK_CUR: 523 offset += file->f_pos; 524 case SEEK_SET: 525 break; 526 case SEEK_END: 527 retval = -EOPNOTSUPP; 528 default: 529 goto out; 530 } 531 532 if (offset >= 0) { 533 if (offset != file->f_pos) { 534 file->f_pos = offset; 535 file->f_version = 0; 536 fi->flags &= ~CEPH_F_ATEND; 537 } 538 retval = offset; 539 540 if (offset == 0 || 541 fpos_frag(offset) != fi->frag || 542 fpos_off(offset) < fi->offset) { 543 /* discard buffered readdir content on seekdir(0), or 544 * seek to new frag, or seek prior to current chunk */ 545 dout("dir_llseek dropping %p content\n", file); 546 reset_readdir(fi, fpos_frag(offset)); 547 } else if (fpos_cmp(offset, old_offset) > 0) { 548 /* reset dir_release_count if we did a forward seek */ 549 fi->dir_release_count = 0; 550 fi->readdir_cache_idx = -1; 551 } 552 } 553 out: 554 inode_unlock(inode); 555 return retval; 556 } 557 558 /* 559 * Handle lookups for the hidden .snap directory. 560 */ 561 int ceph_handle_snapdir(struct ceph_mds_request *req, 562 struct dentry *dentry, int err) 563 { 564 struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); 565 struct inode *parent = d_inode(dentry->d_parent); /* we hold i_mutex */ 566 567 /* .snap dir? */ 568 if (err == -ENOENT && 569 ceph_snap(parent) == CEPH_NOSNAP && 570 strcmp(dentry->d_name.name, 571 fsc->mount_options->snapdir_name) == 0) { 572 struct inode *inode = ceph_get_snapdir(parent); 573 dout("ENOENT on snapdir %p '%pd', linking to snapdir %p\n", 574 dentry, dentry, inode); 575 BUG_ON(!d_unhashed(dentry)); 576 d_add(dentry, inode); 577 err = 0; 578 } 579 return err; 580 } 581 582 /* 583 * Figure out final result of a lookup/open request. 584 * 585 * Mainly, make sure we return the final req->r_dentry (if it already 586 * existed) in place of the original VFS-provided dentry when they 587 * differ. 588 * 589 * Gracefully handle the case where the MDS replies with -ENOENT and 590 * no trace (which it may do, at its discretion, e.g., if it doesn't 591 * care to issue a lease on the negative dentry). 592 */ 593 struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, 594 struct dentry *dentry, int err) 595 { 596 if (err == -ENOENT) { 597 /* no trace? */ 598 err = 0; 599 if (!req->r_reply_info.head->is_dentry) { 600 dout("ENOENT and no trace, dentry %p inode %p\n", 601 dentry, d_inode(dentry)); 602 if (d_really_is_positive(dentry)) { 603 d_drop(dentry); 604 err = -ENOENT; 605 } else { 606 d_add(dentry, NULL); 607 } 608 } 609 } 610 if (err) 611 dentry = ERR_PTR(err); 612 else if (dentry != req->r_dentry) 613 dentry = dget(req->r_dentry); /* we got spliced */ 614 else 615 dentry = NULL; 616 return dentry; 617 } 618 619 static int is_root_ceph_dentry(struct inode *inode, struct dentry *dentry) 620 { 621 return ceph_ino(inode) == CEPH_INO_ROOT && 622 strncmp(dentry->d_name.name, ".ceph", 5) == 0; 623 } 624 625 /* 626 * Look up a single dir entry. If there is a lookup intent, inform 627 * the MDS so that it gets our 'caps wanted' value in a single op. 628 */ 629 static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, 630 unsigned int flags) 631 { 632 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); 633 struct ceph_mds_client *mdsc = fsc->mdsc; 634 struct ceph_mds_request *req; 635 int op; 636 int mask; 637 int err; 638 639 dout("lookup %p dentry %p '%pd'\n", 640 dir, dentry, dentry); 641 642 if (dentry->d_name.len > NAME_MAX) 643 return ERR_PTR(-ENAMETOOLONG); 644 645 err = ceph_init_dentry(dentry); 646 if (err < 0) 647 return ERR_PTR(err); 648 649 /* can we conclude ENOENT locally? */ 650 if (d_really_is_negative(dentry)) { 651 struct ceph_inode_info *ci = ceph_inode(dir); 652 struct ceph_dentry_info *di = ceph_dentry(dentry); 653 654 spin_lock(&ci->i_ceph_lock); 655 dout(" dir %p flags are %d\n", dir, ci->i_ceph_flags); 656 if (strncmp(dentry->d_name.name, 657 fsc->mount_options->snapdir_name, 658 dentry->d_name.len) && 659 !is_root_ceph_dentry(dir, dentry) && 660 ceph_test_mount_opt(fsc, DCACHE) && 661 __ceph_dir_is_complete(ci) && 662 (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) { 663 spin_unlock(&ci->i_ceph_lock); 664 dout(" dir %p complete, -ENOENT\n", dir); 665 d_add(dentry, NULL); 666 di->lease_shared_gen = ci->i_shared_gen; 667 return NULL; 668 } 669 spin_unlock(&ci->i_ceph_lock); 670 } 671 672 op = ceph_snap(dir) == CEPH_SNAPDIR ? 673 CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_LOOKUP; 674 req = ceph_mdsc_create_request(mdsc, op, USE_ANY_MDS); 675 if (IS_ERR(req)) 676 return ERR_CAST(req); 677 req->r_dentry = dget(dentry); 678 req->r_num_caps = 2; 679 680 mask = CEPH_STAT_CAP_INODE | CEPH_CAP_AUTH_SHARED; 681 if (ceph_security_xattr_wanted(dir)) 682 mask |= CEPH_CAP_XATTR_SHARED; 683 req->r_args.getattr.mask = cpu_to_le32(mask); 684 685 req->r_locked_dir = dir; 686 err = ceph_mdsc_do_request(mdsc, NULL, req); 687 err = ceph_handle_snapdir(req, dentry, err); 688 dentry = ceph_finish_lookup(req, dentry, err); 689 ceph_mdsc_put_request(req); /* will dput(dentry) */ 690 dout("lookup result=%p\n", dentry); 691 return dentry; 692 } 693 694 /* 695 * If we do a create but get no trace back from the MDS, follow up with 696 * a lookup (the VFS expects us to link up the provided dentry). 697 */ 698 int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry) 699 { 700 struct dentry *result = ceph_lookup(dir, dentry, 0); 701 702 if (result && !IS_ERR(result)) { 703 /* 704 * We created the item, then did a lookup, and found 705 * it was already linked to another inode we already 706 * had in our cache (and thus got spliced). To not 707 * confuse VFS (especially when inode is a directory), 708 * we don't link our dentry to that inode, return an 709 * error instead. 710 * 711 * This event should be rare and it happens only when 712 * we talk to old MDS. Recent MDS does not send traceless 713 * reply for request that creates new inode. 714 */ 715 d_drop(result); 716 return -ESTALE; 717 } 718 return PTR_ERR(result); 719 } 720 721 static int ceph_mknod(struct inode *dir, struct dentry *dentry, 722 umode_t mode, dev_t rdev) 723 { 724 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); 725 struct ceph_mds_client *mdsc = fsc->mdsc; 726 struct ceph_mds_request *req; 727 struct ceph_acls_info acls = {}; 728 int err; 729 730 if (ceph_snap(dir) != CEPH_NOSNAP) 731 return -EROFS; 732 733 err = ceph_pre_init_acls(dir, &mode, &acls); 734 if (err < 0) 735 return err; 736 737 dout("mknod in dir %p dentry %p mode 0%ho rdev %d\n", 738 dir, dentry, mode, rdev); 739 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_MKNOD, USE_AUTH_MDS); 740 if (IS_ERR(req)) { 741 err = PTR_ERR(req); 742 goto out; 743 } 744 req->r_dentry = dget(dentry); 745 req->r_num_caps = 2; 746 req->r_locked_dir = dir; 747 req->r_args.mknod.mode = cpu_to_le32(mode); 748 req->r_args.mknod.rdev = cpu_to_le32(rdev); 749 req->r_dentry_drop = CEPH_CAP_FILE_SHARED; 750 req->r_dentry_unless = CEPH_CAP_FILE_EXCL; 751 if (acls.pagelist) { 752 req->r_pagelist = acls.pagelist; 753 acls.pagelist = NULL; 754 } 755 err = ceph_mdsc_do_request(mdsc, dir, req); 756 if (!err && !req->r_reply_info.head->is_dentry) 757 err = ceph_handle_notrace_create(dir, dentry); 758 ceph_mdsc_put_request(req); 759 out: 760 if (!err) 761 ceph_init_inode_acls(d_inode(dentry), &acls); 762 else 763 d_drop(dentry); 764 ceph_release_acls_info(&acls); 765 return err; 766 } 767 768 static int ceph_create(struct inode *dir, struct dentry *dentry, umode_t mode, 769 bool excl) 770 { 771 return ceph_mknod(dir, dentry, mode, 0); 772 } 773 774 static int ceph_symlink(struct inode *dir, struct dentry *dentry, 775 const char *dest) 776 { 777 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); 778 struct ceph_mds_client *mdsc = fsc->mdsc; 779 struct ceph_mds_request *req; 780 int err; 781 782 if (ceph_snap(dir) != CEPH_NOSNAP) 783 return -EROFS; 784 785 dout("symlink in dir %p dentry %p to '%s'\n", dir, dentry, dest); 786 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SYMLINK, USE_AUTH_MDS); 787 if (IS_ERR(req)) { 788 err = PTR_ERR(req); 789 goto out; 790 } 791 req->r_path2 = kstrdup(dest, GFP_KERNEL); 792 if (!req->r_path2) { 793 err = -ENOMEM; 794 ceph_mdsc_put_request(req); 795 goto out; 796 } 797 req->r_locked_dir = dir; 798 req->r_dentry = dget(dentry); 799 req->r_num_caps = 2; 800 req->r_dentry_drop = CEPH_CAP_FILE_SHARED; 801 req->r_dentry_unless = CEPH_CAP_FILE_EXCL; 802 err = ceph_mdsc_do_request(mdsc, dir, req); 803 if (!err && !req->r_reply_info.head->is_dentry) 804 err = ceph_handle_notrace_create(dir, dentry); 805 ceph_mdsc_put_request(req); 806 out: 807 if (err) 808 d_drop(dentry); 809 return err; 810 } 811 812 static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) 813 { 814 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); 815 struct ceph_mds_client *mdsc = fsc->mdsc; 816 struct ceph_mds_request *req; 817 struct ceph_acls_info acls = {}; 818 int err = -EROFS; 819 int op; 820 821 if (ceph_snap(dir) == CEPH_SNAPDIR) { 822 /* mkdir .snap/foo is a MKSNAP */ 823 op = CEPH_MDS_OP_MKSNAP; 824 dout("mksnap dir %p snap '%pd' dn %p\n", dir, 825 dentry, dentry); 826 } else if (ceph_snap(dir) == CEPH_NOSNAP) { 827 dout("mkdir dir %p dn %p mode 0%ho\n", dir, dentry, mode); 828 op = CEPH_MDS_OP_MKDIR; 829 } else { 830 goto out; 831 } 832 833 mode |= S_IFDIR; 834 err = ceph_pre_init_acls(dir, &mode, &acls); 835 if (err < 0) 836 goto out; 837 838 req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); 839 if (IS_ERR(req)) { 840 err = PTR_ERR(req); 841 goto out; 842 } 843 844 req->r_dentry = dget(dentry); 845 req->r_num_caps = 2; 846 req->r_locked_dir = dir; 847 req->r_args.mkdir.mode = cpu_to_le32(mode); 848 req->r_dentry_drop = CEPH_CAP_FILE_SHARED; 849 req->r_dentry_unless = CEPH_CAP_FILE_EXCL; 850 if (acls.pagelist) { 851 req->r_pagelist = acls.pagelist; 852 acls.pagelist = NULL; 853 } 854 err = ceph_mdsc_do_request(mdsc, dir, req); 855 if (!err && 856 !req->r_reply_info.head->is_target && 857 !req->r_reply_info.head->is_dentry) 858 err = ceph_handle_notrace_create(dir, dentry); 859 ceph_mdsc_put_request(req); 860 out: 861 if (!err) 862 ceph_init_inode_acls(d_inode(dentry), &acls); 863 else 864 d_drop(dentry); 865 ceph_release_acls_info(&acls); 866 return err; 867 } 868 869 static int ceph_link(struct dentry *old_dentry, struct inode *dir, 870 struct dentry *dentry) 871 { 872 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); 873 struct ceph_mds_client *mdsc = fsc->mdsc; 874 struct ceph_mds_request *req; 875 int err; 876 877 if (ceph_snap(dir) != CEPH_NOSNAP) 878 return -EROFS; 879 880 dout("link in dir %p old_dentry %p dentry %p\n", dir, 881 old_dentry, dentry); 882 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LINK, USE_AUTH_MDS); 883 if (IS_ERR(req)) { 884 d_drop(dentry); 885 return PTR_ERR(req); 886 } 887 req->r_dentry = dget(dentry); 888 req->r_num_caps = 2; 889 req->r_old_dentry = dget(old_dentry); 890 req->r_locked_dir = dir; 891 req->r_dentry_drop = CEPH_CAP_FILE_SHARED; 892 req->r_dentry_unless = CEPH_CAP_FILE_EXCL; 893 /* release LINK_SHARED on source inode (mds will lock it) */ 894 req->r_old_inode_drop = CEPH_CAP_LINK_SHARED; 895 err = ceph_mdsc_do_request(mdsc, dir, req); 896 if (err) { 897 d_drop(dentry); 898 } else if (!req->r_reply_info.head->is_dentry) { 899 ihold(d_inode(old_dentry)); 900 d_instantiate(dentry, d_inode(old_dentry)); 901 } 902 ceph_mdsc_put_request(req); 903 return err; 904 } 905 906 /* 907 * For a soon-to-be unlinked file, drop the AUTH_RDCACHE caps. If it 908 * looks like the link count will hit 0, drop any other caps (other 909 * than PIN) we don't specifically want (due to the file still being 910 * open). 911 */ 912 static int drop_caps_for_unlink(struct inode *inode) 913 { 914 struct ceph_inode_info *ci = ceph_inode(inode); 915 int drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL; 916 917 spin_lock(&ci->i_ceph_lock); 918 if (inode->i_nlink == 1) { 919 drop |= ~(__ceph_caps_wanted(ci) | CEPH_CAP_PIN); 920 ci->i_ceph_flags |= CEPH_I_NODELAY; 921 } 922 spin_unlock(&ci->i_ceph_lock); 923 return drop; 924 } 925 926 /* 927 * rmdir and unlink are differ only by the metadata op code 928 */ 929 static int ceph_unlink(struct inode *dir, struct dentry *dentry) 930 { 931 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); 932 struct ceph_mds_client *mdsc = fsc->mdsc; 933 struct inode *inode = d_inode(dentry); 934 struct ceph_mds_request *req; 935 int err = -EROFS; 936 int op; 937 938 if (ceph_snap(dir) == CEPH_SNAPDIR) { 939 /* rmdir .snap/foo is RMSNAP */ 940 dout("rmsnap dir %p '%pd' dn %p\n", dir, dentry, dentry); 941 op = CEPH_MDS_OP_RMSNAP; 942 } else if (ceph_snap(dir) == CEPH_NOSNAP) { 943 dout("unlink/rmdir dir %p dn %p inode %p\n", 944 dir, dentry, inode); 945 op = d_is_dir(dentry) ? 946 CEPH_MDS_OP_RMDIR : CEPH_MDS_OP_UNLINK; 947 } else 948 goto out; 949 req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); 950 if (IS_ERR(req)) { 951 err = PTR_ERR(req); 952 goto out; 953 } 954 req->r_dentry = dget(dentry); 955 req->r_num_caps = 2; 956 req->r_locked_dir = dir; 957 req->r_dentry_drop = CEPH_CAP_FILE_SHARED; 958 req->r_dentry_unless = CEPH_CAP_FILE_EXCL; 959 req->r_inode_drop = drop_caps_for_unlink(inode); 960 err = ceph_mdsc_do_request(mdsc, dir, req); 961 if (!err && !req->r_reply_info.head->is_dentry) 962 d_delete(dentry); 963 ceph_mdsc_put_request(req); 964 out: 965 return err; 966 } 967 968 static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, 969 struct inode *new_dir, struct dentry *new_dentry) 970 { 971 struct ceph_fs_client *fsc = ceph_sb_to_client(old_dir->i_sb); 972 struct ceph_mds_client *mdsc = fsc->mdsc; 973 struct ceph_mds_request *req; 974 int op = CEPH_MDS_OP_RENAME; 975 int err; 976 977 if (ceph_snap(old_dir) != ceph_snap(new_dir)) 978 return -EXDEV; 979 if (ceph_snap(old_dir) != CEPH_NOSNAP) { 980 if (old_dir == new_dir && ceph_snap(old_dir) == CEPH_SNAPDIR) 981 op = CEPH_MDS_OP_RENAMESNAP; 982 else 983 return -EROFS; 984 } 985 dout("rename dir %p dentry %p to dir %p dentry %p\n", 986 old_dir, old_dentry, new_dir, new_dentry); 987 req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); 988 if (IS_ERR(req)) 989 return PTR_ERR(req); 990 ihold(old_dir); 991 req->r_dentry = dget(new_dentry); 992 req->r_num_caps = 2; 993 req->r_old_dentry = dget(old_dentry); 994 req->r_old_dentry_dir = old_dir; 995 req->r_locked_dir = new_dir; 996 req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED; 997 req->r_old_dentry_unless = CEPH_CAP_FILE_EXCL; 998 req->r_dentry_drop = CEPH_CAP_FILE_SHARED; 999 req->r_dentry_unless = CEPH_CAP_FILE_EXCL; 1000 /* release LINK_RDCACHE on source inode (mds will lock it) */ 1001 req->r_old_inode_drop = CEPH_CAP_LINK_SHARED; 1002 if (d_really_is_positive(new_dentry)) 1003 req->r_inode_drop = drop_caps_for_unlink(d_inode(new_dentry)); 1004 err = ceph_mdsc_do_request(mdsc, old_dir, req); 1005 if (!err && !req->r_reply_info.head->is_dentry) { 1006 /* 1007 * Normally d_move() is done by fill_trace (called by 1008 * do_request, above). If there is no trace, we need 1009 * to do it here. 1010 */ 1011 1012 /* d_move screws up sibling dentries' offsets */ 1013 ceph_dir_clear_complete(old_dir); 1014 ceph_dir_clear_complete(new_dir); 1015 1016 d_move(old_dentry, new_dentry); 1017 1018 /* ensure target dentry is invalidated, despite 1019 rehashing bug in vfs_rename_dir */ 1020 ceph_invalidate_dentry_lease(new_dentry); 1021 } 1022 ceph_mdsc_put_request(req); 1023 return err; 1024 } 1025 1026 /* 1027 * Ensure a dentry lease will no longer revalidate. 1028 */ 1029 void ceph_invalidate_dentry_lease(struct dentry *dentry) 1030 { 1031 spin_lock(&dentry->d_lock); 1032 dentry->d_time = jiffies; 1033 ceph_dentry(dentry)->lease_shared_gen = 0; 1034 spin_unlock(&dentry->d_lock); 1035 } 1036 1037 /* 1038 * Check if dentry lease is valid. If not, delete the lease. Try to 1039 * renew if the least is more than half up. 1040 */ 1041 static int dentry_lease_is_valid(struct dentry *dentry) 1042 { 1043 struct ceph_dentry_info *di; 1044 struct ceph_mds_session *s; 1045 int valid = 0; 1046 u32 gen; 1047 unsigned long ttl; 1048 struct ceph_mds_session *session = NULL; 1049 struct inode *dir = NULL; 1050 u32 seq = 0; 1051 1052 spin_lock(&dentry->d_lock); 1053 di = ceph_dentry(dentry); 1054 if (di->lease_session) { 1055 s = di->lease_session; 1056 spin_lock(&s->s_gen_ttl_lock); 1057 gen = s->s_cap_gen; 1058 ttl = s->s_cap_ttl; 1059 spin_unlock(&s->s_gen_ttl_lock); 1060 1061 if (di->lease_gen == gen && 1062 time_before(jiffies, dentry->d_time) && 1063 time_before(jiffies, ttl)) { 1064 valid = 1; 1065 if (di->lease_renew_after && 1066 time_after(jiffies, di->lease_renew_after)) { 1067 /* we should renew */ 1068 dir = d_inode(dentry->d_parent); 1069 session = ceph_get_mds_session(s); 1070 seq = di->lease_seq; 1071 di->lease_renew_after = 0; 1072 di->lease_renew_from = jiffies; 1073 } 1074 } 1075 } 1076 spin_unlock(&dentry->d_lock); 1077 1078 if (session) { 1079 ceph_mdsc_lease_send_msg(session, dir, dentry, 1080 CEPH_MDS_LEASE_RENEW, seq); 1081 ceph_put_mds_session(session); 1082 } 1083 dout("dentry_lease_is_valid - dentry %p = %d\n", dentry, valid); 1084 return valid; 1085 } 1086 1087 /* 1088 * Check if directory-wide content lease/cap is valid. 1089 */ 1090 static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry) 1091 { 1092 struct ceph_inode_info *ci = ceph_inode(dir); 1093 struct ceph_dentry_info *di = ceph_dentry(dentry); 1094 int valid = 0; 1095 1096 spin_lock(&ci->i_ceph_lock); 1097 if (ci->i_shared_gen == di->lease_shared_gen) 1098 valid = __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1); 1099 spin_unlock(&ci->i_ceph_lock); 1100 dout("dir_lease_is_valid dir %p v%u dentry %p v%u = %d\n", 1101 dir, (unsigned)ci->i_shared_gen, dentry, 1102 (unsigned)di->lease_shared_gen, valid); 1103 return valid; 1104 } 1105 1106 /* 1107 * Check if cached dentry can be trusted. 1108 */ 1109 static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags) 1110 { 1111 int valid = 0; 1112 struct dentry *parent; 1113 struct inode *dir; 1114 1115 if (flags & LOOKUP_RCU) 1116 return -ECHILD; 1117 1118 dout("d_revalidate %p '%pd' inode %p offset %lld\n", dentry, 1119 dentry, d_inode(dentry), ceph_dentry(dentry)->offset); 1120 1121 parent = dget_parent(dentry); 1122 dir = d_inode(parent); 1123 1124 /* always trust cached snapped dentries, snapdir dentry */ 1125 if (ceph_snap(dir) != CEPH_NOSNAP) { 1126 dout("d_revalidate %p '%pd' inode %p is SNAPPED\n", dentry, 1127 dentry, d_inode(dentry)); 1128 valid = 1; 1129 } else if (d_really_is_positive(dentry) && 1130 ceph_snap(d_inode(dentry)) == CEPH_SNAPDIR) { 1131 valid = 1; 1132 } else if (dentry_lease_is_valid(dentry) || 1133 dir_lease_is_valid(dir, dentry)) { 1134 if (d_really_is_positive(dentry)) 1135 valid = ceph_is_any_caps(d_inode(dentry)); 1136 else 1137 valid = 1; 1138 } 1139 1140 if (!valid) { 1141 struct ceph_mds_client *mdsc = 1142 ceph_sb_to_client(dir->i_sb)->mdsc; 1143 struct ceph_mds_request *req; 1144 int op, mask, err; 1145 1146 op = ceph_snap(dir) == CEPH_SNAPDIR ? 1147 CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_LOOKUP; 1148 req = ceph_mdsc_create_request(mdsc, op, USE_ANY_MDS); 1149 if (!IS_ERR(req)) { 1150 req->r_dentry = dget(dentry); 1151 req->r_num_caps = 2; 1152 1153 mask = CEPH_STAT_CAP_INODE | CEPH_CAP_AUTH_SHARED; 1154 if (ceph_security_xattr_wanted(dir)) 1155 mask |= CEPH_CAP_XATTR_SHARED; 1156 req->r_args.getattr.mask = mask; 1157 1158 req->r_locked_dir = dir; 1159 err = ceph_mdsc_do_request(mdsc, NULL, req); 1160 if (err == 0 || err == -ENOENT) { 1161 if (dentry == req->r_dentry) { 1162 valid = !d_unhashed(dentry); 1163 } else { 1164 d_invalidate(req->r_dentry); 1165 err = -EAGAIN; 1166 } 1167 } 1168 ceph_mdsc_put_request(req); 1169 dout("d_revalidate %p lookup result=%d\n", 1170 dentry, err); 1171 } 1172 } 1173 1174 dout("d_revalidate %p %s\n", dentry, valid ? "valid" : "invalid"); 1175 if (valid) { 1176 ceph_dentry_lru_touch(dentry); 1177 } else { 1178 ceph_dir_clear_complete(dir); 1179 } 1180 1181 dput(parent); 1182 return valid; 1183 } 1184 1185 /* 1186 * Release our ceph_dentry_info. 1187 */ 1188 static void ceph_d_release(struct dentry *dentry) 1189 { 1190 struct ceph_dentry_info *di = ceph_dentry(dentry); 1191 1192 dout("d_release %p\n", dentry); 1193 ceph_dentry_lru_del(dentry); 1194 if (di->lease_session) 1195 ceph_put_mds_session(di->lease_session); 1196 kmem_cache_free(ceph_dentry_cachep, di); 1197 dentry->d_fsdata = NULL; 1198 } 1199 1200 static int ceph_snapdir_d_revalidate(struct dentry *dentry, 1201 unsigned int flags) 1202 { 1203 /* 1204 * Eventually, we'll want to revalidate snapped metadata 1205 * too... probably... 1206 */ 1207 return 1; 1208 } 1209 1210 /* 1211 * When the VFS prunes a dentry from the cache, we need to clear the 1212 * complete flag on the parent directory. 1213 * 1214 * Called under dentry->d_lock. 1215 */ 1216 static void ceph_d_prune(struct dentry *dentry) 1217 { 1218 dout("ceph_d_prune %p\n", dentry); 1219 1220 /* do we have a valid parent? */ 1221 if (IS_ROOT(dentry)) 1222 return; 1223 1224 /* if we are not hashed, we don't affect dir's completeness */ 1225 if (d_unhashed(dentry)) 1226 return; 1227 1228 /* 1229 * we hold d_lock, so d_parent is stable, and d_fsdata is never 1230 * cleared until d_release 1231 */ 1232 ceph_dir_clear_complete(d_inode(dentry->d_parent)); 1233 } 1234 1235 /* 1236 * read() on a dir. This weird interface hack only works if mounted 1237 * with '-o dirstat'. 1238 */ 1239 static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, 1240 loff_t *ppos) 1241 { 1242 struct ceph_file_info *cf = file->private_data; 1243 struct inode *inode = file_inode(file); 1244 struct ceph_inode_info *ci = ceph_inode(inode); 1245 int left; 1246 const int bufsize = 1024; 1247 1248 if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT)) 1249 return -EISDIR; 1250 1251 if (!cf->dir_info) { 1252 cf->dir_info = kmalloc(bufsize, GFP_KERNEL); 1253 if (!cf->dir_info) 1254 return -ENOMEM; 1255 cf->dir_info_len = 1256 snprintf(cf->dir_info, bufsize, 1257 "entries: %20lld\n" 1258 " files: %20lld\n" 1259 " subdirs: %20lld\n" 1260 "rentries: %20lld\n" 1261 " rfiles: %20lld\n" 1262 " rsubdirs: %20lld\n" 1263 "rbytes: %20lld\n" 1264 "rctime: %10ld.%09ld\n", 1265 ci->i_files + ci->i_subdirs, 1266 ci->i_files, 1267 ci->i_subdirs, 1268 ci->i_rfiles + ci->i_rsubdirs, 1269 ci->i_rfiles, 1270 ci->i_rsubdirs, 1271 ci->i_rbytes, 1272 (long)ci->i_rctime.tv_sec, 1273 (long)ci->i_rctime.tv_nsec); 1274 } 1275 1276 if (*ppos >= cf->dir_info_len) 1277 return 0; 1278 size = min_t(unsigned, size, cf->dir_info_len-*ppos); 1279 left = copy_to_user(buf, cf->dir_info + *ppos, size); 1280 if (left == size) 1281 return -EFAULT; 1282 *ppos += (size - left); 1283 return size - left; 1284 } 1285 1286 /* 1287 * We maintain a private dentry LRU. 1288 * 1289 * FIXME: this needs to be changed to a per-mds lru to be useful. 1290 */ 1291 void ceph_dentry_lru_add(struct dentry *dn) 1292 { 1293 struct ceph_dentry_info *di = ceph_dentry(dn); 1294 struct ceph_mds_client *mdsc; 1295 1296 dout("dentry_lru_add %p %p '%pd'\n", di, dn, dn); 1297 mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; 1298 spin_lock(&mdsc->dentry_lru_lock); 1299 list_add_tail(&di->lru, &mdsc->dentry_lru); 1300 mdsc->num_dentry++; 1301 spin_unlock(&mdsc->dentry_lru_lock); 1302 } 1303 1304 void ceph_dentry_lru_touch(struct dentry *dn) 1305 { 1306 struct ceph_dentry_info *di = ceph_dentry(dn); 1307 struct ceph_mds_client *mdsc; 1308 1309 dout("dentry_lru_touch %p %p '%pd' (offset %lld)\n", di, dn, dn, 1310 di->offset); 1311 mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; 1312 spin_lock(&mdsc->dentry_lru_lock); 1313 list_move_tail(&di->lru, &mdsc->dentry_lru); 1314 spin_unlock(&mdsc->dentry_lru_lock); 1315 } 1316 1317 void ceph_dentry_lru_del(struct dentry *dn) 1318 { 1319 struct ceph_dentry_info *di = ceph_dentry(dn); 1320 struct ceph_mds_client *mdsc; 1321 1322 dout("dentry_lru_del %p %p '%pd'\n", di, dn, dn); 1323 mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; 1324 spin_lock(&mdsc->dentry_lru_lock); 1325 list_del_init(&di->lru); 1326 mdsc->num_dentry--; 1327 spin_unlock(&mdsc->dentry_lru_lock); 1328 } 1329 1330 /* 1331 * Return name hash for a given dentry. This is dependent on 1332 * the parent directory's hash function. 1333 */ 1334 unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn) 1335 { 1336 struct ceph_inode_info *dci = ceph_inode(dir); 1337 1338 switch (dci->i_dir_layout.dl_dir_hash) { 1339 case 0: /* for backward compat */ 1340 case CEPH_STR_HASH_LINUX: 1341 return dn->d_name.hash; 1342 1343 default: 1344 return ceph_str_hash(dci->i_dir_layout.dl_dir_hash, 1345 dn->d_name.name, dn->d_name.len); 1346 } 1347 } 1348 1349 const struct file_operations ceph_dir_fops = { 1350 .read = ceph_read_dir, 1351 .iterate = ceph_readdir, 1352 .llseek = ceph_dir_llseek, 1353 .open = ceph_open, 1354 .release = ceph_release, 1355 .unlocked_ioctl = ceph_ioctl, 1356 .fsync = ceph_fsync, 1357 }; 1358 1359 const struct file_operations ceph_snapdir_fops = { 1360 .iterate = ceph_readdir, 1361 .llseek = ceph_dir_llseek, 1362 .open = ceph_open, 1363 .release = ceph_release, 1364 }; 1365 1366 const struct inode_operations ceph_dir_iops = { 1367 .lookup = ceph_lookup, 1368 .permission = ceph_permission, 1369 .getattr = ceph_getattr, 1370 .setattr = ceph_setattr, 1371 .setxattr = ceph_setxattr, 1372 .getxattr = ceph_getxattr, 1373 .listxattr = ceph_listxattr, 1374 .removexattr = ceph_removexattr, 1375 .get_acl = ceph_get_acl, 1376 .set_acl = ceph_set_acl, 1377 .mknod = ceph_mknod, 1378 .symlink = ceph_symlink, 1379 .mkdir = ceph_mkdir, 1380 .link = ceph_link, 1381 .unlink = ceph_unlink, 1382 .rmdir = ceph_unlink, 1383 .rename = ceph_rename, 1384 .create = ceph_create, 1385 .atomic_open = ceph_atomic_open, 1386 }; 1387 1388 const struct inode_operations ceph_snapdir_iops = { 1389 .lookup = ceph_lookup, 1390 .permission = ceph_permission, 1391 .getattr = ceph_getattr, 1392 .mkdir = ceph_mkdir, 1393 .rmdir = ceph_unlink, 1394 .rename = ceph_rename, 1395 }; 1396 1397 const struct dentry_operations ceph_dentry_ops = { 1398 .d_revalidate = ceph_d_revalidate, 1399 .d_release = ceph_d_release, 1400 .d_prune = ceph_d_prune, 1401 }; 1402 1403 const struct dentry_operations ceph_snapdir_dentry_ops = { 1404 .d_revalidate = ceph_snapdir_d_revalidate, 1405 .d_release = ceph_d_release, 1406 }; 1407 1408 const struct dentry_operations ceph_snap_dentry_ops = { 1409 .d_release = ceph_d_release, 1410 .d_prune = ceph_d_prune, 1411 }; 1412