1 #include <linux/ceph/ceph_debug.h> 2 3 #include <linux/spinlock.h> 4 #include <linux/fs_struct.h> 5 #include <linux/namei.h> 6 #include <linux/slab.h> 7 #include <linux/sched.h> 8 9 #include "super.h" 10 #include "mds_client.h" 11 12 /* 13 * Directory operations: readdir, lookup, create, link, unlink, 14 * rename, etc. 15 */ 16 17 /* 18 * Ceph MDS operations are specified in terms of a base ino and 19 * relative path. Thus, the client can specify an operation on a 20 * specific inode (e.g., a getattr due to fstat(2)), or as a path 21 * relative to, say, the root directory. 22 * 23 * Normally, we limit ourselves to strict inode ops (no path component) 24 * or dentry operations (a single path component relative to an ino). The 25 * exception to this is open_root_dentry(), which will open the mount 26 * point by name. 27 */ 28 29 const struct inode_operations ceph_dir_iops; 30 const struct file_operations ceph_dir_fops; 31 const struct dentry_operations ceph_dentry_ops; 32 33 /* 34 * Initialize ceph dentry state. 35 */ 36 int ceph_init_dentry(struct dentry *dentry) 37 { 38 struct ceph_dentry_info *di; 39 40 if (dentry->d_fsdata) 41 return 0; 42 43 di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS | __GFP_ZERO); 44 if (!di) 45 return -ENOMEM; /* oh well */ 46 47 spin_lock(&dentry->d_lock); 48 if (dentry->d_fsdata) { 49 /* lost a race */ 50 kmem_cache_free(ceph_dentry_cachep, di); 51 goto out_unlock; 52 } 53 54 if (dentry->d_parent == NULL || /* nfs fh_to_dentry */ 55 ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP) 56 d_set_d_op(dentry, &ceph_dentry_ops); 57 else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR) 58 d_set_d_op(dentry, &ceph_snapdir_dentry_ops); 59 else 60 d_set_d_op(dentry, &ceph_snap_dentry_ops); 61 62 di->dentry = dentry; 63 di->lease_session = NULL; 64 dentry->d_time = jiffies; 65 /* avoid reordering d_fsdata setup so that the check above is safe */ 66 smp_mb(); 67 dentry->d_fsdata = di; 68 ceph_dentry_lru_add(dentry); 69 out_unlock: 70 spin_unlock(&dentry->d_lock); 71 return 0; 72 } 73 74 struct inode *ceph_get_dentry_parent_inode(struct dentry *dentry) 75 { 76 struct inode *inode = NULL; 77 78 if (!dentry) 79 return NULL; 80 81 spin_lock(&dentry->d_lock); 82 if (dentry->d_parent) { 83 inode = dentry->d_parent->d_inode; 84 ihold(inode); 85 } 86 spin_unlock(&dentry->d_lock); 87 return inode; 88 } 89 90 91 /* 92 * for readdir, we encode the directory frag and offset within that 93 * frag into f_pos. 94 */ 95 static unsigned fpos_frag(loff_t p) 96 { 97 return p >> 32; 98 } 99 static unsigned fpos_off(loff_t p) 100 { 101 return p & 0xffffffff; 102 } 103 104 /* 105 * When possible, we try to satisfy a readdir by peeking at the 106 * dcache. We make this work by carefully ordering dentries on 107 * d_u.d_child when we initially get results back from the MDS, and 108 * falling back to a "normal" sync readdir if any dentries in the dir 109 * are dropped. 110 * 111 * D_COMPLETE tells indicates we have all dentries in the dir. It is 112 * defined IFF we hold CEPH_CAP_FILE_SHARED (which will be revoked by 113 * the MDS if/when the directory is modified). 114 */ 115 static int __dcache_readdir(struct file *filp, 116 void *dirent, filldir_t filldir) 117 { 118 struct ceph_file_info *fi = filp->private_data; 119 struct dentry *parent = filp->f_dentry; 120 struct inode *dir = parent->d_inode; 121 struct list_head *p; 122 struct dentry *dentry, *last; 123 struct ceph_dentry_info *di; 124 int err = 0; 125 126 /* claim ref on last dentry we returned */ 127 last = fi->dentry; 128 fi->dentry = NULL; 129 130 dout("__dcache_readdir %p at %llu (last %p)\n", dir, filp->f_pos, 131 last); 132 133 spin_lock(&parent->d_lock); 134 135 /* start at beginning? */ 136 if (filp->f_pos == 2 || last == NULL || 137 filp->f_pos < ceph_dentry(last)->offset) { 138 if (list_empty(&parent->d_subdirs)) 139 goto out_unlock; 140 p = parent->d_subdirs.prev; 141 dout(" initial p %p/%p\n", p->prev, p->next); 142 } else { 143 p = last->d_u.d_child.prev; 144 } 145 146 more: 147 dentry = list_entry(p, struct dentry, d_u.d_child); 148 di = ceph_dentry(dentry); 149 while (1) { 150 dout(" p %p/%p %s d_subdirs %p/%p\n", p->prev, p->next, 151 d_unhashed(dentry) ? "!hashed" : "hashed", 152 parent->d_subdirs.prev, parent->d_subdirs.next); 153 if (p == &parent->d_subdirs) { 154 fi->flags |= CEPH_F_ATEND; 155 goto out_unlock; 156 } 157 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); 158 if (!d_unhashed(dentry) && dentry->d_inode && 159 ceph_snap(dentry->d_inode) != CEPH_SNAPDIR && 160 ceph_ino(dentry->d_inode) != CEPH_INO_CEPH && 161 filp->f_pos <= di->offset) 162 break; 163 dout(" skipping %p %.*s at %llu (%llu)%s%s\n", dentry, 164 dentry->d_name.len, dentry->d_name.name, di->offset, 165 filp->f_pos, d_unhashed(dentry) ? " unhashed" : "", 166 !dentry->d_inode ? " null" : ""); 167 spin_unlock(&dentry->d_lock); 168 p = p->prev; 169 dentry = list_entry(p, struct dentry, d_u.d_child); 170 di = ceph_dentry(dentry); 171 } 172 173 dget_dlock(dentry); 174 spin_unlock(&dentry->d_lock); 175 spin_unlock(&parent->d_lock); 176 177 dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, filp->f_pos, 178 dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode); 179 filp->f_pos = di->offset; 180 err = filldir(dirent, dentry->d_name.name, 181 dentry->d_name.len, di->offset, 182 ceph_translate_ino(dentry->d_sb, dentry->d_inode->i_ino), 183 dentry->d_inode->i_mode >> 12); 184 185 if (last) { 186 if (err < 0) { 187 /* remember our position */ 188 fi->dentry = last; 189 fi->next_offset = di->offset; 190 } else { 191 dput(last); 192 } 193 } 194 last = dentry; 195 196 if (err < 0) 197 goto out; 198 199 filp->f_pos++; 200 201 /* make sure a dentry wasn't dropped while we didn't have parent lock */ 202 if (!ceph_dir_test_complete(dir)) { 203 dout(" lost D_COMPLETE on %p; falling back to mds\n", dir); 204 err = -EAGAIN; 205 goto out; 206 } 207 208 spin_lock(&parent->d_lock); 209 p = p->prev; /* advance to next dentry */ 210 goto more; 211 212 out_unlock: 213 spin_unlock(&parent->d_lock); 214 out: 215 if (last) 216 dput(last); 217 return err; 218 } 219 220 /* 221 * make note of the last dentry we read, so we can 222 * continue at the same lexicographical point, 223 * regardless of what dir changes take place on the 224 * server. 225 */ 226 static int note_last_dentry(struct ceph_file_info *fi, const char *name, 227 int len) 228 { 229 kfree(fi->last_name); 230 fi->last_name = kmalloc(len+1, GFP_NOFS); 231 if (!fi->last_name) 232 return -ENOMEM; 233 memcpy(fi->last_name, name, len); 234 fi->last_name[len] = 0; 235 dout("note_last_dentry '%s'\n", fi->last_name); 236 return 0; 237 } 238 239 static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir) 240 { 241 struct ceph_file_info *fi = filp->private_data; 242 struct inode *inode = filp->f_dentry->d_inode; 243 struct ceph_inode_info *ci = ceph_inode(inode); 244 struct ceph_fs_client *fsc = ceph_inode_to_client(inode); 245 struct ceph_mds_client *mdsc = fsc->mdsc; 246 unsigned frag = fpos_frag(filp->f_pos); 247 int off = fpos_off(filp->f_pos); 248 int err; 249 u32 ftype; 250 struct ceph_mds_reply_info_parsed *rinfo; 251 const int max_entries = fsc->mount_options->max_readdir; 252 const int max_bytes = fsc->mount_options->max_readdir_bytes; 253 254 dout("readdir %p filp %p frag %u off %u\n", inode, filp, frag, off); 255 if (fi->flags & CEPH_F_ATEND) 256 return 0; 257 258 /* always start with . and .. */ 259 if (filp->f_pos == 0) { 260 /* note dir version at start of readdir so we can tell 261 * if any dentries get dropped */ 262 fi->dir_release_count = ci->i_release_count; 263 264 dout("readdir off 0 -> '.'\n"); 265 if (filldir(dirent, ".", 1, ceph_make_fpos(0, 0), 266 ceph_translate_ino(inode->i_sb, inode->i_ino), 267 inode->i_mode >> 12) < 0) 268 return 0; 269 filp->f_pos = 1; 270 off = 1; 271 } 272 if (filp->f_pos == 1) { 273 ino_t ino = parent_ino(filp->f_dentry); 274 dout("readdir off 1 -> '..'\n"); 275 if (filldir(dirent, "..", 2, ceph_make_fpos(0, 1), 276 ceph_translate_ino(inode->i_sb, ino), 277 inode->i_mode >> 12) < 0) 278 return 0; 279 filp->f_pos = 2; 280 off = 2; 281 } 282 283 /* can we use the dcache? */ 284 spin_lock(&ci->i_ceph_lock); 285 if ((filp->f_pos == 2 || fi->dentry) && 286 !ceph_test_mount_opt(fsc, NOASYNCREADDIR) && 287 ceph_snap(inode) != CEPH_SNAPDIR && 288 ceph_dir_test_complete(inode) && 289 __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) { 290 spin_unlock(&ci->i_ceph_lock); 291 err = __dcache_readdir(filp, dirent, filldir); 292 if (err != -EAGAIN) 293 return err; 294 } else { 295 spin_unlock(&ci->i_ceph_lock); 296 } 297 if (fi->dentry) { 298 err = note_last_dentry(fi, fi->dentry->d_name.name, 299 fi->dentry->d_name.len); 300 if (err) 301 return err; 302 dput(fi->dentry); 303 fi->dentry = NULL; 304 } 305 306 /* proceed with a normal readdir */ 307 308 more: 309 /* do we have the correct frag content buffered? */ 310 if (fi->frag != frag || fi->last_readdir == NULL) { 311 struct ceph_mds_request *req; 312 int op = ceph_snap(inode) == CEPH_SNAPDIR ? 313 CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR; 314 315 /* discard old result, if any */ 316 if (fi->last_readdir) { 317 ceph_mdsc_put_request(fi->last_readdir); 318 fi->last_readdir = NULL; 319 } 320 321 /* requery frag tree, as the frag topology may have changed */ 322 frag = ceph_choose_frag(ceph_inode(inode), frag, NULL, NULL); 323 324 dout("readdir fetching %llx.%llx frag %x offset '%s'\n", 325 ceph_vinop(inode), frag, fi->last_name); 326 req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); 327 if (IS_ERR(req)) 328 return PTR_ERR(req); 329 req->r_inode = inode; 330 ihold(inode); 331 req->r_dentry = dget(filp->f_dentry); 332 /* hints to request -> mds selection code */ 333 req->r_direct_mode = USE_AUTH_MDS; 334 req->r_direct_hash = ceph_frag_value(frag); 335 req->r_direct_is_hash = true; 336 req->r_path2 = kstrdup(fi->last_name, GFP_NOFS); 337 req->r_readdir_offset = fi->next_offset; 338 req->r_args.readdir.frag = cpu_to_le32(frag); 339 req->r_args.readdir.max_entries = cpu_to_le32(max_entries); 340 req->r_args.readdir.max_bytes = cpu_to_le32(max_bytes); 341 req->r_num_caps = max_entries + 1; 342 err = ceph_mdsc_do_request(mdsc, NULL, req); 343 if (err < 0) { 344 ceph_mdsc_put_request(req); 345 return err; 346 } 347 dout("readdir got and parsed readdir result=%d" 348 " on frag %x, end=%d, complete=%d\n", err, frag, 349 (int)req->r_reply_info.dir_end, 350 (int)req->r_reply_info.dir_complete); 351 352 if (!req->r_did_prepopulate) { 353 dout("readdir !did_prepopulate"); 354 fi->dir_release_count--; /* preclude D_COMPLETE */ 355 } 356 357 /* note next offset and last dentry name */ 358 fi->offset = fi->next_offset; 359 fi->last_readdir = req; 360 361 if (req->r_reply_info.dir_end) { 362 kfree(fi->last_name); 363 fi->last_name = NULL; 364 if (ceph_frag_is_rightmost(frag)) 365 fi->next_offset = 2; 366 else 367 fi->next_offset = 0; 368 } else { 369 rinfo = &req->r_reply_info; 370 err = note_last_dentry(fi, 371 rinfo->dir_dname[rinfo->dir_nr-1], 372 rinfo->dir_dname_len[rinfo->dir_nr-1]); 373 if (err) 374 return err; 375 fi->next_offset += rinfo->dir_nr; 376 } 377 } 378 379 rinfo = &fi->last_readdir->r_reply_info; 380 dout("readdir frag %x num %d off %d chunkoff %d\n", frag, 381 rinfo->dir_nr, off, fi->offset); 382 while (off >= fi->offset && off - fi->offset < rinfo->dir_nr) { 383 u64 pos = ceph_make_fpos(frag, off); 384 struct ceph_mds_reply_inode *in = 385 rinfo->dir_in[off - fi->offset].in; 386 struct ceph_vino vino; 387 ino_t ino; 388 389 dout("readdir off %d (%d/%d) -> %lld '%.*s' %p\n", 390 off, off - fi->offset, rinfo->dir_nr, pos, 391 rinfo->dir_dname_len[off - fi->offset], 392 rinfo->dir_dname[off - fi->offset], in); 393 BUG_ON(!in); 394 ftype = le32_to_cpu(in->mode) >> 12; 395 vino.ino = le64_to_cpu(in->ino); 396 vino.snap = le64_to_cpu(in->snapid); 397 ino = ceph_vino_to_ino(vino); 398 if (filldir(dirent, 399 rinfo->dir_dname[off - fi->offset], 400 rinfo->dir_dname_len[off - fi->offset], 401 pos, 402 ceph_translate_ino(inode->i_sb, ino), ftype) < 0) { 403 dout("filldir stopping us...\n"); 404 return 0; 405 } 406 off++; 407 filp->f_pos = pos + 1; 408 } 409 410 if (fi->last_name) { 411 ceph_mdsc_put_request(fi->last_readdir); 412 fi->last_readdir = NULL; 413 goto more; 414 } 415 416 /* more frags? */ 417 if (!ceph_frag_is_rightmost(frag)) { 418 frag = ceph_frag_next(frag); 419 off = 0; 420 filp->f_pos = ceph_make_fpos(frag, off); 421 dout("readdir next frag is %x\n", frag); 422 goto more; 423 } 424 fi->flags |= CEPH_F_ATEND; 425 426 /* 427 * if dir_release_count still matches the dir, no dentries 428 * were released during the whole readdir, and we should have 429 * the complete dir contents in our cache. 430 */ 431 spin_lock(&ci->i_ceph_lock); 432 if (ci->i_release_count == fi->dir_release_count) { 433 ceph_dir_set_complete(inode); 434 ci->i_max_offset = filp->f_pos; 435 } 436 spin_unlock(&ci->i_ceph_lock); 437 438 dout("readdir %p filp %p done.\n", inode, filp); 439 return 0; 440 } 441 442 static void reset_readdir(struct ceph_file_info *fi) 443 { 444 if (fi->last_readdir) { 445 ceph_mdsc_put_request(fi->last_readdir); 446 fi->last_readdir = NULL; 447 } 448 kfree(fi->last_name); 449 fi->last_name = NULL; 450 fi->next_offset = 2; /* compensate for . and .. */ 451 if (fi->dentry) { 452 dput(fi->dentry); 453 fi->dentry = NULL; 454 } 455 fi->flags &= ~CEPH_F_ATEND; 456 } 457 458 static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin) 459 { 460 struct ceph_file_info *fi = file->private_data; 461 struct inode *inode = file->f_mapping->host; 462 loff_t old_offset = offset; 463 loff_t retval; 464 465 mutex_lock(&inode->i_mutex); 466 retval = -EINVAL; 467 switch (origin) { 468 case SEEK_END: 469 offset += inode->i_size + 2; /* FIXME */ 470 break; 471 case SEEK_CUR: 472 offset += file->f_pos; 473 case SEEK_SET: 474 break; 475 default: 476 goto out; 477 } 478 479 if (offset >= 0 && offset <= inode->i_sb->s_maxbytes) { 480 if (offset != file->f_pos) { 481 file->f_pos = offset; 482 file->f_version = 0; 483 fi->flags &= ~CEPH_F_ATEND; 484 } 485 retval = offset; 486 487 /* 488 * discard buffered readdir content on seekdir(0), or 489 * seek to new frag, or seek prior to current chunk. 490 */ 491 if (offset == 0 || 492 fpos_frag(offset) != fpos_frag(old_offset) || 493 fpos_off(offset) < fi->offset) { 494 dout("dir_llseek dropping %p content\n", file); 495 reset_readdir(fi); 496 } 497 498 /* bump dir_release_count if we did a forward seek */ 499 if (offset > old_offset) 500 fi->dir_release_count--; 501 } 502 out: 503 mutex_unlock(&inode->i_mutex); 504 return retval; 505 } 506 507 /* 508 * Handle lookups for the hidden .snap directory. 509 */ 510 int ceph_handle_snapdir(struct ceph_mds_request *req, 511 struct dentry *dentry, int err) 512 { 513 struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); 514 struct inode *parent = dentry->d_parent->d_inode; /* we hold i_mutex */ 515 516 /* .snap dir? */ 517 if (err == -ENOENT && 518 ceph_snap(parent) == CEPH_NOSNAP && 519 strcmp(dentry->d_name.name, 520 fsc->mount_options->snapdir_name) == 0) { 521 struct inode *inode = ceph_get_snapdir(parent); 522 dout("ENOENT on snapdir %p '%.*s', linking to snapdir %p\n", 523 dentry, dentry->d_name.len, dentry->d_name.name, inode); 524 BUG_ON(!d_unhashed(dentry)); 525 d_add(dentry, inode); 526 err = 0; 527 } 528 return err; 529 } 530 531 /* 532 * Figure out final result of a lookup/open request. 533 * 534 * Mainly, make sure we return the final req->r_dentry (if it already 535 * existed) in place of the original VFS-provided dentry when they 536 * differ. 537 * 538 * Gracefully handle the case where the MDS replies with -ENOENT and 539 * no trace (which it may do, at its discretion, e.g., if it doesn't 540 * care to issue a lease on the negative dentry). 541 */ 542 struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, 543 struct dentry *dentry, int err) 544 { 545 if (err == -ENOENT) { 546 /* no trace? */ 547 err = 0; 548 if (!req->r_reply_info.head->is_dentry) { 549 dout("ENOENT and no trace, dentry %p inode %p\n", 550 dentry, dentry->d_inode); 551 if (dentry->d_inode) { 552 d_drop(dentry); 553 err = -ENOENT; 554 } else { 555 d_add(dentry, NULL); 556 } 557 } 558 } 559 if (err) 560 dentry = ERR_PTR(err); 561 else if (dentry != req->r_dentry) 562 dentry = dget(req->r_dentry); /* we got spliced */ 563 else 564 dentry = NULL; 565 return dentry; 566 } 567 568 static int is_root_ceph_dentry(struct inode *inode, struct dentry *dentry) 569 { 570 return ceph_ino(inode) == CEPH_INO_ROOT && 571 strncmp(dentry->d_name.name, ".ceph", 5) == 0; 572 } 573 574 /* 575 * Look up a single dir entry. If there is a lookup intent, inform 576 * the MDS so that it gets our 'caps wanted' value in a single op. 577 */ 578 static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, 579 struct nameidata *nd) 580 { 581 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); 582 struct ceph_mds_client *mdsc = fsc->mdsc; 583 struct ceph_mds_request *req; 584 int op; 585 int err; 586 587 dout("lookup %p dentry %p '%.*s'\n", 588 dir, dentry, dentry->d_name.len, dentry->d_name.name); 589 590 if (dentry->d_name.len > NAME_MAX) 591 return ERR_PTR(-ENAMETOOLONG); 592 593 err = ceph_init_dentry(dentry); 594 if (err < 0) 595 return ERR_PTR(err); 596 597 /* can we conclude ENOENT locally? */ 598 if (dentry->d_inode == NULL) { 599 struct ceph_inode_info *ci = ceph_inode(dir); 600 struct ceph_dentry_info *di = ceph_dentry(dentry); 601 602 spin_lock(&ci->i_ceph_lock); 603 dout(" dir %p flags are %d\n", dir, ci->i_ceph_flags); 604 if (strncmp(dentry->d_name.name, 605 fsc->mount_options->snapdir_name, 606 dentry->d_name.len) && 607 !is_root_ceph_dentry(dir, dentry) && 608 ceph_dir_test_complete(dir) && 609 (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) { 610 spin_unlock(&ci->i_ceph_lock); 611 dout(" dir %p complete, -ENOENT\n", dir); 612 d_add(dentry, NULL); 613 di->lease_shared_gen = ci->i_shared_gen; 614 return NULL; 615 } 616 spin_unlock(&ci->i_ceph_lock); 617 } 618 619 op = ceph_snap(dir) == CEPH_SNAPDIR ? 620 CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_LOOKUP; 621 req = ceph_mdsc_create_request(mdsc, op, USE_ANY_MDS); 622 if (IS_ERR(req)) 623 return ERR_CAST(req); 624 req->r_dentry = dget(dentry); 625 req->r_num_caps = 2; 626 /* we only need inode linkage */ 627 req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE); 628 req->r_locked_dir = dir; 629 err = ceph_mdsc_do_request(mdsc, NULL, req); 630 err = ceph_handle_snapdir(req, dentry, err); 631 dentry = ceph_finish_lookup(req, dentry, err); 632 ceph_mdsc_put_request(req); /* will dput(dentry) */ 633 dout("lookup result=%p\n", dentry); 634 return dentry; 635 } 636 637 int ceph_atomic_open(struct inode *dir, struct dentry *dentry, 638 struct opendata *od, unsigned flags, umode_t mode, 639 int *opened) 640 { 641 int err; 642 struct dentry *res = NULL; 643 644 if (!(flags & O_CREAT)) { 645 if (dentry->d_name.len > NAME_MAX) 646 return -ENAMETOOLONG; 647 648 err = ceph_init_dentry(dentry); 649 if (err < 0) 650 return err; 651 652 return ceph_lookup_open(dir, dentry, od, flags, mode, opened); 653 } 654 655 if (d_unhashed(dentry)) { 656 res = ceph_lookup(dir, dentry, NULL); 657 if (IS_ERR(res)) 658 return PTR_ERR(res); 659 660 if (res) 661 dentry = res; 662 } 663 664 /* We don't deal with positive dentries here */ 665 if (dentry->d_inode) { 666 finish_no_open(od, res); 667 return 1; 668 } 669 670 *opened |= FILE_CREATED; 671 err = ceph_lookup_open(dir, dentry, od, flags, mode, opened); 672 dput(res); 673 674 return err; 675 } 676 677 /* 678 * If we do a create but get no trace back from the MDS, follow up with 679 * a lookup (the VFS expects us to link up the provided dentry). 680 */ 681 int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry) 682 { 683 struct dentry *result = ceph_lookup(dir, dentry, NULL); 684 685 if (result && !IS_ERR(result)) { 686 /* 687 * We created the item, then did a lookup, and found 688 * it was already linked to another inode we already 689 * had in our cache (and thus got spliced). Link our 690 * dentry to that inode, but don't hash it, just in 691 * case the VFS wants to dereference it. 692 */ 693 BUG_ON(!result->d_inode); 694 d_instantiate(dentry, result->d_inode); 695 return 0; 696 } 697 return PTR_ERR(result); 698 } 699 700 static int ceph_mknod(struct inode *dir, struct dentry *dentry, 701 umode_t mode, dev_t rdev) 702 { 703 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); 704 struct ceph_mds_client *mdsc = fsc->mdsc; 705 struct ceph_mds_request *req; 706 int err; 707 708 if (ceph_snap(dir) != CEPH_NOSNAP) 709 return -EROFS; 710 711 dout("mknod in dir %p dentry %p mode 0%ho rdev %d\n", 712 dir, dentry, mode, rdev); 713 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_MKNOD, USE_AUTH_MDS); 714 if (IS_ERR(req)) { 715 d_drop(dentry); 716 return PTR_ERR(req); 717 } 718 req->r_dentry = dget(dentry); 719 req->r_num_caps = 2; 720 req->r_locked_dir = dir; 721 req->r_args.mknod.mode = cpu_to_le32(mode); 722 req->r_args.mknod.rdev = cpu_to_le32(rdev); 723 req->r_dentry_drop = CEPH_CAP_FILE_SHARED; 724 req->r_dentry_unless = CEPH_CAP_FILE_EXCL; 725 err = ceph_mdsc_do_request(mdsc, dir, req); 726 if (!err && !req->r_reply_info.head->is_dentry) 727 err = ceph_handle_notrace_create(dir, dentry); 728 ceph_mdsc_put_request(req); 729 if (err) 730 d_drop(dentry); 731 return err; 732 } 733 734 static int ceph_create(struct inode *dir, struct dentry *dentry, umode_t mode, 735 struct nameidata *nd) 736 { 737 return ceph_mknod(dir, dentry, mode, 0); 738 } 739 740 static int ceph_symlink(struct inode *dir, struct dentry *dentry, 741 const char *dest) 742 { 743 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); 744 struct ceph_mds_client *mdsc = fsc->mdsc; 745 struct ceph_mds_request *req; 746 int err; 747 748 if (ceph_snap(dir) != CEPH_NOSNAP) 749 return -EROFS; 750 751 dout("symlink in dir %p dentry %p to '%s'\n", dir, dentry, dest); 752 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SYMLINK, USE_AUTH_MDS); 753 if (IS_ERR(req)) { 754 d_drop(dentry); 755 return PTR_ERR(req); 756 } 757 req->r_dentry = dget(dentry); 758 req->r_num_caps = 2; 759 req->r_path2 = kstrdup(dest, GFP_NOFS); 760 req->r_locked_dir = dir; 761 req->r_dentry_drop = CEPH_CAP_FILE_SHARED; 762 req->r_dentry_unless = CEPH_CAP_FILE_EXCL; 763 err = ceph_mdsc_do_request(mdsc, dir, req); 764 if (!err && !req->r_reply_info.head->is_dentry) 765 err = ceph_handle_notrace_create(dir, dentry); 766 ceph_mdsc_put_request(req); 767 if (err) 768 d_drop(dentry); 769 return err; 770 } 771 772 static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) 773 { 774 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); 775 struct ceph_mds_client *mdsc = fsc->mdsc; 776 struct ceph_mds_request *req; 777 int err = -EROFS; 778 int op; 779 780 if (ceph_snap(dir) == CEPH_SNAPDIR) { 781 /* mkdir .snap/foo is a MKSNAP */ 782 op = CEPH_MDS_OP_MKSNAP; 783 dout("mksnap dir %p snap '%.*s' dn %p\n", dir, 784 dentry->d_name.len, dentry->d_name.name, dentry); 785 } else if (ceph_snap(dir) == CEPH_NOSNAP) { 786 dout("mkdir dir %p dn %p mode 0%ho\n", dir, dentry, mode); 787 op = CEPH_MDS_OP_MKDIR; 788 } else { 789 goto out; 790 } 791 req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); 792 if (IS_ERR(req)) { 793 err = PTR_ERR(req); 794 goto out; 795 } 796 797 req->r_dentry = dget(dentry); 798 req->r_num_caps = 2; 799 req->r_locked_dir = dir; 800 req->r_args.mkdir.mode = cpu_to_le32(mode); 801 req->r_dentry_drop = CEPH_CAP_FILE_SHARED; 802 req->r_dentry_unless = CEPH_CAP_FILE_EXCL; 803 err = ceph_mdsc_do_request(mdsc, dir, req); 804 if (!err && !req->r_reply_info.head->is_dentry) 805 err = ceph_handle_notrace_create(dir, dentry); 806 ceph_mdsc_put_request(req); 807 out: 808 if (err < 0) 809 d_drop(dentry); 810 return err; 811 } 812 813 static int ceph_link(struct dentry *old_dentry, struct inode *dir, 814 struct dentry *dentry) 815 { 816 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); 817 struct ceph_mds_client *mdsc = fsc->mdsc; 818 struct ceph_mds_request *req; 819 int err; 820 821 if (ceph_snap(dir) != CEPH_NOSNAP) 822 return -EROFS; 823 824 dout("link in dir %p old_dentry %p dentry %p\n", dir, 825 old_dentry, dentry); 826 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LINK, USE_AUTH_MDS); 827 if (IS_ERR(req)) { 828 d_drop(dentry); 829 return PTR_ERR(req); 830 } 831 req->r_dentry = dget(dentry); 832 req->r_num_caps = 2; 833 req->r_old_dentry = dget(old_dentry); /* or inode? hrm. */ 834 req->r_old_dentry_dir = ceph_get_dentry_parent_inode(old_dentry); 835 req->r_locked_dir = dir; 836 req->r_dentry_drop = CEPH_CAP_FILE_SHARED; 837 req->r_dentry_unless = CEPH_CAP_FILE_EXCL; 838 err = ceph_mdsc_do_request(mdsc, dir, req); 839 if (err) { 840 d_drop(dentry); 841 } else if (!req->r_reply_info.head->is_dentry) { 842 ihold(old_dentry->d_inode); 843 d_instantiate(dentry, old_dentry->d_inode); 844 } 845 ceph_mdsc_put_request(req); 846 return err; 847 } 848 849 /* 850 * For a soon-to-be unlinked file, drop the AUTH_RDCACHE caps. If it 851 * looks like the link count will hit 0, drop any other caps (other 852 * than PIN) we don't specifically want (due to the file still being 853 * open). 854 */ 855 static int drop_caps_for_unlink(struct inode *inode) 856 { 857 struct ceph_inode_info *ci = ceph_inode(inode); 858 int drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL; 859 860 spin_lock(&ci->i_ceph_lock); 861 if (inode->i_nlink == 1) { 862 drop |= ~(__ceph_caps_wanted(ci) | CEPH_CAP_PIN); 863 ci->i_ceph_flags |= CEPH_I_NODELAY; 864 } 865 spin_unlock(&ci->i_ceph_lock); 866 return drop; 867 } 868 869 /* 870 * rmdir and unlink are differ only by the metadata op code 871 */ 872 static int ceph_unlink(struct inode *dir, struct dentry *dentry) 873 { 874 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); 875 struct ceph_mds_client *mdsc = fsc->mdsc; 876 struct inode *inode = dentry->d_inode; 877 struct ceph_mds_request *req; 878 int err = -EROFS; 879 int op; 880 881 if (ceph_snap(dir) == CEPH_SNAPDIR) { 882 /* rmdir .snap/foo is RMSNAP */ 883 dout("rmsnap dir %p '%.*s' dn %p\n", dir, dentry->d_name.len, 884 dentry->d_name.name, dentry); 885 op = CEPH_MDS_OP_RMSNAP; 886 } else if (ceph_snap(dir) == CEPH_NOSNAP) { 887 dout("unlink/rmdir dir %p dn %p inode %p\n", 888 dir, dentry, inode); 889 op = S_ISDIR(dentry->d_inode->i_mode) ? 890 CEPH_MDS_OP_RMDIR : CEPH_MDS_OP_UNLINK; 891 } else 892 goto out; 893 req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); 894 if (IS_ERR(req)) { 895 err = PTR_ERR(req); 896 goto out; 897 } 898 req->r_dentry = dget(dentry); 899 req->r_num_caps = 2; 900 req->r_locked_dir = dir; 901 req->r_dentry_drop = CEPH_CAP_FILE_SHARED; 902 req->r_dentry_unless = CEPH_CAP_FILE_EXCL; 903 req->r_inode_drop = drop_caps_for_unlink(inode); 904 err = ceph_mdsc_do_request(mdsc, dir, req); 905 if (!err && !req->r_reply_info.head->is_dentry) 906 d_delete(dentry); 907 ceph_mdsc_put_request(req); 908 out: 909 return err; 910 } 911 912 static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, 913 struct inode *new_dir, struct dentry *new_dentry) 914 { 915 struct ceph_fs_client *fsc = ceph_sb_to_client(old_dir->i_sb); 916 struct ceph_mds_client *mdsc = fsc->mdsc; 917 struct ceph_mds_request *req; 918 int err; 919 920 if (ceph_snap(old_dir) != ceph_snap(new_dir)) 921 return -EXDEV; 922 if (ceph_snap(old_dir) != CEPH_NOSNAP || 923 ceph_snap(new_dir) != CEPH_NOSNAP) 924 return -EROFS; 925 dout("rename dir %p dentry %p to dir %p dentry %p\n", 926 old_dir, old_dentry, new_dir, new_dentry); 927 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_RENAME, USE_AUTH_MDS); 928 if (IS_ERR(req)) 929 return PTR_ERR(req); 930 req->r_dentry = dget(new_dentry); 931 req->r_num_caps = 2; 932 req->r_old_dentry = dget(old_dentry); 933 req->r_old_dentry_dir = ceph_get_dentry_parent_inode(old_dentry); 934 req->r_locked_dir = new_dir; 935 req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED; 936 req->r_old_dentry_unless = CEPH_CAP_FILE_EXCL; 937 req->r_dentry_drop = CEPH_CAP_FILE_SHARED; 938 req->r_dentry_unless = CEPH_CAP_FILE_EXCL; 939 /* release LINK_RDCACHE on source inode (mds will lock it) */ 940 req->r_old_inode_drop = CEPH_CAP_LINK_SHARED; 941 if (new_dentry->d_inode) 942 req->r_inode_drop = drop_caps_for_unlink(new_dentry->d_inode); 943 err = ceph_mdsc_do_request(mdsc, old_dir, req); 944 if (!err && !req->r_reply_info.head->is_dentry) { 945 /* 946 * Normally d_move() is done by fill_trace (called by 947 * do_request, above). If there is no trace, we need 948 * to do it here. 949 */ 950 951 /* d_move screws up d_subdirs order */ 952 ceph_dir_clear_complete(new_dir); 953 954 d_move(old_dentry, new_dentry); 955 956 /* ensure target dentry is invalidated, despite 957 rehashing bug in vfs_rename_dir */ 958 ceph_invalidate_dentry_lease(new_dentry); 959 } 960 ceph_mdsc_put_request(req); 961 return err; 962 } 963 964 /* 965 * Ensure a dentry lease will no longer revalidate. 966 */ 967 void ceph_invalidate_dentry_lease(struct dentry *dentry) 968 { 969 spin_lock(&dentry->d_lock); 970 dentry->d_time = jiffies; 971 ceph_dentry(dentry)->lease_shared_gen = 0; 972 spin_unlock(&dentry->d_lock); 973 } 974 975 /* 976 * Check if dentry lease is valid. If not, delete the lease. Try to 977 * renew if the least is more than half up. 978 */ 979 static int dentry_lease_is_valid(struct dentry *dentry) 980 { 981 struct ceph_dentry_info *di; 982 struct ceph_mds_session *s; 983 int valid = 0; 984 u32 gen; 985 unsigned long ttl; 986 struct ceph_mds_session *session = NULL; 987 struct inode *dir = NULL; 988 u32 seq = 0; 989 990 spin_lock(&dentry->d_lock); 991 di = ceph_dentry(dentry); 992 if (di->lease_session) { 993 s = di->lease_session; 994 spin_lock(&s->s_gen_ttl_lock); 995 gen = s->s_cap_gen; 996 ttl = s->s_cap_ttl; 997 spin_unlock(&s->s_gen_ttl_lock); 998 999 if (di->lease_gen == gen && 1000 time_before(jiffies, dentry->d_time) && 1001 time_before(jiffies, ttl)) { 1002 valid = 1; 1003 if (di->lease_renew_after && 1004 time_after(jiffies, di->lease_renew_after)) { 1005 /* we should renew */ 1006 dir = dentry->d_parent->d_inode; 1007 session = ceph_get_mds_session(s); 1008 seq = di->lease_seq; 1009 di->lease_renew_after = 0; 1010 di->lease_renew_from = jiffies; 1011 } 1012 } 1013 } 1014 spin_unlock(&dentry->d_lock); 1015 1016 if (session) { 1017 ceph_mdsc_lease_send_msg(session, dir, dentry, 1018 CEPH_MDS_LEASE_RENEW, seq); 1019 ceph_put_mds_session(session); 1020 } 1021 dout("dentry_lease_is_valid - dentry %p = %d\n", dentry, valid); 1022 return valid; 1023 } 1024 1025 /* 1026 * Check if directory-wide content lease/cap is valid. 1027 */ 1028 static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry) 1029 { 1030 struct ceph_inode_info *ci = ceph_inode(dir); 1031 struct ceph_dentry_info *di = ceph_dentry(dentry); 1032 int valid = 0; 1033 1034 spin_lock(&ci->i_ceph_lock); 1035 if (ci->i_shared_gen == di->lease_shared_gen) 1036 valid = __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1); 1037 spin_unlock(&ci->i_ceph_lock); 1038 dout("dir_lease_is_valid dir %p v%u dentry %p v%u = %d\n", 1039 dir, (unsigned)ci->i_shared_gen, dentry, 1040 (unsigned)di->lease_shared_gen, valid); 1041 return valid; 1042 } 1043 1044 /* 1045 * Check if cached dentry can be trusted. 1046 */ 1047 static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd) 1048 { 1049 int valid = 0; 1050 struct inode *dir; 1051 1052 if (nd && nd->flags & LOOKUP_RCU) 1053 return -ECHILD; 1054 1055 dout("d_revalidate %p '%.*s' inode %p offset %lld\n", dentry, 1056 dentry->d_name.len, dentry->d_name.name, dentry->d_inode, 1057 ceph_dentry(dentry)->offset); 1058 1059 dir = ceph_get_dentry_parent_inode(dentry); 1060 1061 /* always trust cached snapped dentries, snapdir dentry */ 1062 if (ceph_snap(dir) != CEPH_NOSNAP) { 1063 dout("d_revalidate %p '%.*s' inode %p is SNAPPED\n", dentry, 1064 dentry->d_name.len, dentry->d_name.name, dentry->d_inode); 1065 valid = 1; 1066 } else if (dentry->d_inode && 1067 ceph_snap(dentry->d_inode) == CEPH_SNAPDIR) { 1068 valid = 1; 1069 } else if (dentry_lease_is_valid(dentry) || 1070 dir_lease_is_valid(dir, dentry)) { 1071 valid = 1; 1072 } 1073 1074 dout("d_revalidate %p %s\n", dentry, valid ? "valid" : "invalid"); 1075 if (valid) 1076 ceph_dentry_lru_touch(dentry); 1077 else 1078 d_drop(dentry); 1079 iput(dir); 1080 return valid; 1081 } 1082 1083 /* 1084 * Release our ceph_dentry_info. 1085 */ 1086 static void ceph_d_release(struct dentry *dentry) 1087 { 1088 struct ceph_dentry_info *di = ceph_dentry(dentry); 1089 1090 dout("d_release %p\n", dentry); 1091 ceph_dentry_lru_del(dentry); 1092 if (di->lease_session) 1093 ceph_put_mds_session(di->lease_session); 1094 kmem_cache_free(ceph_dentry_cachep, di); 1095 dentry->d_fsdata = NULL; 1096 } 1097 1098 static int ceph_snapdir_d_revalidate(struct dentry *dentry, 1099 struct nameidata *nd) 1100 { 1101 /* 1102 * Eventually, we'll want to revalidate snapped metadata 1103 * too... probably... 1104 */ 1105 return 1; 1106 } 1107 1108 /* 1109 * Set/clear/test dir complete flag on the dir's dentry. 1110 */ 1111 void ceph_dir_set_complete(struct inode *inode) 1112 { 1113 struct dentry *dentry = d_find_any_alias(inode); 1114 1115 if (dentry && ceph_dentry(dentry) && 1116 ceph_test_mount_opt(ceph_sb_to_client(dentry->d_sb), DCACHE)) { 1117 dout(" marking %p (%p) complete\n", inode, dentry); 1118 set_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags); 1119 } 1120 dput(dentry); 1121 } 1122 1123 void ceph_dir_clear_complete(struct inode *inode) 1124 { 1125 struct dentry *dentry = d_find_any_alias(inode); 1126 1127 if (dentry && ceph_dentry(dentry)) { 1128 dout(" marking %p (%p) complete\n", inode, dentry); 1129 set_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags); 1130 } 1131 dput(dentry); 1132 } 1133 1134 bool ceph_dir_test_complete(struct inode *inode) 1135 { 1136 struct dentry *dentry = d_find_any_alias(inode); 1137 1138 if (dentry && ceph_dentry(dentry)) { 1139 dout(" marking %p (%p) NOT complete\n", inode, dentry); 1140 clear_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags); 1141 } 1142 dput(dentry); 1143 return false; 1144 } 1145 1146 /* 1147 * When the VFS prunes a dentry from the cache, we need to clear the 1148 * complete flag on the parent directory. 1149 * 1150 * Called under dentry->d_lock. 1151 */ 1152 static void ceph_d_prune(struct dentry *dentry) 1153 { 1154 struct ceph_dentry_info *di; 1155 1156 dout("ceph_d_prune %p\n", dentry); 1157 1158 /* do we have a valid parent? */ 1159 if (!dentry->d_parent || IS_ROOT(dentry)) 1160 return; 1161 1162 /* if we are not hashed, we don't affect D_COMPLETE */ 1163 if (d_unhashed(dentry)) 1164 return; 1165 1166 /* 1167 * we hold d_lock, so d_parent is stable, and d_fsdata is never 1168 * cleared until d_release 1169 */ 1170 di = ceph_dentry(dentry->d_parent); 1171 clear_bit(CEPH_D_COMPLETE, &di->flags); 1172 } 1173 1174 /* 1175 * read() on a dir. This weird interface hack only works if mounted 1176 * with '-o dirstat'. 1177 */ 1178 static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, 1179 loff_t *ppos) 1180 { 1181 struct ceph_file_info *cf = file->private_data; 1182 struct inode *inode = file->f_dentry->d_inode; 1183 struct ceph_inode_info *ci = ceph_inode(inode); 1184 int left; 1185 const int bufsize = 1024; 1186 1187 if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT)) 1188 return -EISDIR; 1189 1190 if (!cf->dir_info) { 1191 cf->dir_info = kmalloc(bufsize, GFP_NOFS); 1192 if (!cf->dir_info) 1193 return -ENOMEM; 1194 cf->dir_info_len = 1195 snprintf(cf->dir_info, bufsize, 1196 "entries: %20lld\n" 1197 " files: %20lld\n" 1198 " subdirs: %20lld\n" 1199 "rentries: %20lld\n" 1200 " rfiles: %20lld\n" 1201 " rsubdirs: %20lld\n" 1202 "rbytes: %20lld\n" 1203 "rctime: %10ld.%09ld\n", 1204 ci->i_files + ci->i_subdirs, 1205 ci->i_files, 1206 ci->i_subdirs, 1207 ci->i_rfiles + ci->i_rsubdirs, 1208 ci->i_rfiles, 1209 ci->i_rsubdirs, 1210 ci->i_rbytes, 1211 (long)ci->i_rctime.tv_sec, 1212 (long)ci->i_rctime.tv_nsec); 1213 } 1214 1215 if (*ppos >= cf->dir_info_len) 1216 return 0; 1217 size = min_t(unsigned, size, cf->dir_info_len-*ppos); 1218 left = copy_to_user(buf, cf->dir_info + *ppos, size); 1219 if (left == size) 1220 return -EFAULT; 1221 *ppos += (size - left); 1222 return size - left; 1223 } 1224 1225 /* 1226 * an fsync() on a dir will wait for any uncommitted directory 1227 * operations to commit. 1228 */ 1229 static int ceph_dir_fsync(struct file *file, loff_t start, loff_t end, 1230 int datasync) 1231 { 1232 struct inode *inode = file->f_path.dentry->d_inode; 1233 struct ceph_inode_info *ci = ceph_inode(inode); 1234 struct list_head *head = &ci->i_unsafe_dirops; 1235 struct ceph_mds_request *req; 1236 u64 last_tid; 1237 int ret = 0; 1238 1239 dout("dir_fsync %p\n", inode); 1240 ret = filemap_write_and_wait_range(inode->i_mapping, start, end); 1241 if (ret) 1242 return ret; 1243 mutex_lock(&inode->i_mutex); 1244 1245 spin_lock(&ci->i_unsafe_lock); 1246 if (list_empty(head)) 1247 goto out; 1248 1249 req = list_entry(head->prev, 1250 struct ceph_mds_request, r_unsafe_dir_item); 1251 last_tid = req->r_tid; 1252 1253 do { 1254 ceph_mdsc_get_request(req); 1255 spin_unlock(&ci->i_unsafe_lock); 1256 1257 dout("dir_fsync %p wait on tid %llu (until %llu)\n", 1258 inode, req->r_tid, last_tid); 1259 if (req->r_timeout) { 1260 ret = wait_for_completion_timeout( 1261 &req->r_safe_completion, req->r_timeout); 1262 if (ret > 0) 1263 ret = 0; 1264 else if (ret == 0) 1265 ret = -EIO; /* timed out */ 1266 } else { 1267 wait_for_completion(&req->r_safe_completion); 1268 } 1269 ceph_mdsc_put_request(req); 1270 1271 spin_lock(&ci->i_unsafe_lock); 1272 if (ret || list_empty(head)) 1273 break; 1274 req = list_entry(head->next, 1275 struct ceph_mds_request, r_unsafe_dir_item); 1276 } while (req->r_tid < last_tid); 1277 out: 1278 spin_unlock(&ci->i_unsafe_lock); 1279 mutex_unlock(&inode->i_mutex); 1280 1281 return ret; 1282 } 1283 1284 /* 1285 * We maintain a private dentry LRU. 1286 * 1287 * FIXME: this needs to be changed to a per-mds lru to be useful. 1288 */ 1289 void ceph_dentry_lru_add(struct dentry *dn) 1290 { 1291 struct ceph_dentry_info *di = ceph_dentry(dn); 1292 struct ceph_mds_client *mdsc; 1293 1294 dout("dentry_lru_add %p %p '%.*s'\n", di, dn, 1295 dn->d_name.len, dn->d_name.name); 1296 mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; 1297 spin_lock(&mdsc->dentry_lru_lock); 1298 list_add_tail(&di->lru, &mdsc->dentry_lru); 1299 mdsc->num_dentry++; 1300 spin_unlock(&mdsc->dentry_lru_lock); 1301 } 1302 1303 void ceph_dentry_lru_touch(struct dentry *dn) 1304 { 1305 struct ceph_dentry_info *di = ceph_dentry(dn); 1306 struct ceph_mds_client *mdsc; 1307 1308 dout("dentry_lru_touch %p %p '%.*s' (offset %lld)\n", di, dn, 1309 dn->d_name.len, dn->d_name.name, di->offset); 1310 mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; 1311 spin_lock(&mdsc->dentry_lru_lock); 1312 list_move_tail(&di->lru, &mdsc->dentry_lru); 1313 spin_unlock(&mdsc->dentry_lru_lock); 1314 } 1315 1316 void ceph_dentry_lru_del(struct dentry *dn) 1317 { 1318 struct ceph_dentry_info *di = ceph_dentry(dn); 1319 struct ceph_mds_client *mdsc; 1320 1321 dout("dentry_lru_del %p %p '%.*s'\n", di, dn, 1322 dn->d_name.len, dn->d_name.name); 1323 mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; 1324 spin_lock(&mdsc->dentry_lru_lock); 1325 list_del_init(&di->lru); 1326 mdsc->num_dentry--; 1327 spin_unlock(&mdsc->dentry_lru_lock); 1328 } 1329 1330 /* 1331 * Return name hash for a given dentry. This is dependent on 1332 * the parent directory's hash function. 1333 */ 1334 unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn) 1335 { 1336 struct ceph_inode_info *dci = ceph_inode(dir); 1337 1338 switch (dci->i_dir_layout.dl_dir_hash) { 1339 case 0: /* for backward compat */ 1340 case CEPH_STR_HASH_LINUX: 1341 return dn->d_name.hash; 1342 1343 default: 1344 return ceph_str_hash(dci->i_dir_layout.dl_dir_hash, 1345 dn->d_name.name, dn->d_name.len); 1346 } 1347 } 1348 1349 const struct file_operations ceph_dir_fops = { 1350 .read = ceph_read_dir, 1351 .readdir = ceph_readdir, 1352 .llseek = ceph_dir_llseek, 1353 .open = ceph_open, 1354 .release = ceph_release, 1355 .unlocked_ioctl = ceph_ioctl, 1356 .fsync = ceph_dir_fsync, 1357 }; 1358 1359 const struct inode_operations ceph_dir_iops = { 1360 .lookup = ceph_lookup, 1361 .permission = ceph_permission, 1362 .getattr = ceph_getattr, 1363 .setattr = ceph_setattr, 1364 .setxattr = ceph_setxattr, 1365 .getxattr = ceph_getxattr, 1366 .listxattr = ceph_listxattr, 1367 .removexattr = ceph_removexattr, 1368 .mknod = ceph_mknod, 1369 .symlink = ceph_symlink, 1370 .mkdir = ceph_mkdir, 1371 .link = ceph_link, 1372 .unlink = ceph_unlink, 1373 .rmdir = ceph_unlink, 1374 .rename = ceph_rename, 1375 .create = ceph_create, 1376 .atomic_open = ceph_atomic_open, 1377 }; 1378 1379 const struct dentry_operations ceph_dentry_ops = { 1380 .d_revalidate = ceph_d_revalidate, 1381 .d_release = ceph_d_release, 1382 .d_prune = ceph_d_prune, 1383 }; 1384 1385 const struct dentry_operations ceph_snapdir_dentry_ops = { 1386 .d_revalidate = ceph_snapdir_d_revalidate, 1387 .d_release = ceph_d_release, 1388 }; 1389 1390 const struct dentry_operations ceph_snap_dentry_ops = { 1391 .d_release = ceph_d_release, 1392 .d_prune = ceph_d_prune, 1393 }; 1394