13d14c5d2SYehuda Sadeh #include <linux/ceph/ceph_debug.h> 2355da1ebSSage Weil 3355da1ebSSage Weil #include <linux/module.h> 4355da1ebSSage Weil #include <linux/fs.h> 5355da1ebSSage Weil #include <linux/slab.h> 6355da1ebSSage Weil #include <linux/string.h> 7355da1ebSSage Weil #include <linux/uaccess.h> 8355da1ebSSage Weil #include <linux/kernel.h> 9355da1ebSSage Weil #include <linux/namei.h> 10355da1ebSSage Weil #include <linux/writeback.h> 11355da1ebSSage Weil #include <linux/vmalloc.h> 12355da1ebSSage Weil 13355da1ebSSage Weil #include "super.h" 143d14c5d2SYehuda Sadeh #include "mds_client.h" 1599ccbd22SMilosz Tanski #include "cache.h" 163d14c5d2SYehuda Sadeh #include <linux/ceph/decode.h> 17355da1ebSSage Weil 18355da1ebSSage Weil /* 19355da1ebSSage Weil * Ceph inode operations 20355da1ebSSage Weil * 21355da1ebSSage Weil * Implement basic inode helpers (get, alloc) and inode ops (getattr, 22355da1ebSSage Weil * setattr, etc.), xattr helpers, and helpers for assimilating 23355da1ebSSage Weil * metadata returned by the MDS into our cache. 24355da1ebSSage Weil * 25355da1ebSSage Weil * Also define helpers for doing asynchronous writeback, invalidation, 26355da1ebSSage Weil * and truncation for the benefit of those who can't afford to block 27355da1ebSSage Weil * (typically because they are in the message handler path). 28355da1ebSSage Weil */ 29355da1ebSSage Weil 30355da1ebSSage Weil static const struct inode_operations ceph_symlink_iops; 31355da1ebSSage Weil 323c6f6b79SSage Weil static void ceph_invalidate_work(struct work_struct *work); 333c6f6b79SSage Weil static void ceph_writeback_work(struct work_struct *work); 343c6f6b79SSage Weil static void ceph_vmtruncate_work(struct work_struct *work); 35355da1ebSSage Weil 36355da1ebSSage Weil /* 37355da1ebSSage Weil * find or create an inode, given the ceph ino number 38355da1ebSSage Weil */ 39ad1fee96SYehuda Sadeh static int ceph_set_ino_cb(struct inode *inode, void *data) 40ad1fee96SYehuda Sadeh { 41ad1fee96SYehuda Sadeh ceph_inode(inode)->i_vino = *(struct ceph_vino *)data; 42ad1fee96SYehuda Sadeh inode->i_ino = ceph_vino_to_ino(*(struct ceph_vino *)data); 43ad1fee96SYehuda Sadeh return 0; 44ad1fee96SYehuda Sadeh } 45ad1fee96SYehuda Sadeh 46355da1ebSSage Weil struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino) 47355da1ebSSage Weil { 48355da1ebSSage Weil struct inode *inode; 49355da1ebSSage Weil ino_t t = ceph_vino_to_ino(vino); 50355da1ebSSage Weil 51355da1ebSSage Weil inode = iget5_locked(sb, t, ceph_ino_compare, ceph_set_ino_cb, &vino); 52355da1ebSSage Weil if (inode == NULL) 53355da1ebSSage Weil return ERR_PTR(-ENOMEM); 54355da1ebSSage Weil if (inode->i_state & I_NEW) { 55355da1ebSSage Weil dout("get_inode created new inode %p %llx.%llx ino %llx\n", 56355da1ebSSage Weil inode, ceph_vinop(inode), (u64)inode->i_ino); 57355da1ebSSage Weil unlock_new_inode(inode); 58355da1ebSSage Weil } 59355da1ebSSage Weil 60355da1ebSSage Weil dout("get_inode on %lu=%llx.%llx got %p\n", inode->i_ino, vino.ino, 61355da1ebSSage Weil vino.snap, inode); 62355da1ebSSage Weil return inode; 63355da1ebSSage Weil } 64355da1ebSSage Weil 656f60f889SYan, Zheng struct inode *ceph_lookup_inode(struct super_block *sb, struct ceph_vino vino) 666f60f889SYan, Zheng { 676f60f889SYan, Zheng struct inode *inode; 686f60f889SYan, Zheng ino_t t = ceph_vino_to_ino(vino); 696f60f889SYan, Zheng inode = ilookup5_nowait(sb, t, ceph_ino_compare, &vino); 706f60f889SYan, Zheng return inode; 716f60f889SYan, Zheng } 726f60f889SYan, Zheng 73355da1ebSSage Weil /* 74355da1ebSSage Weil * get/constuct snapdir inode for a given directory 75355da1ebSSage Weil */ 76355da1ebSSage Weil struct inode *ceph_get_snapdir(struct inode *parent) 77355da1ebSSage Weil { 78355da1ebSSage Weil struct ceph_vino vino = { 79355da1ebSSage Weil .ino = ceph_ino(parent), 80355da1ebSSage Weil .snap = CEPH_SNAPDIR, 81355da1ebSSage Weil }; 82355da1ebSSage Weil struct inode *inode = ceph_get_inode(parent->i_sb, vino); 83b377ff13SSage Weil struct ceph_inode_info *ci = ceph_inode(inode); 84355da1ebSSage Weil 85355da1ebSSage Weil BUG_ON(!S_ISDIR(parent->i_mode)); 86355da1ebSSage Weil if (IS_ERR(inode)) 877e34bc52SJulia Lawall return inode; 88355da1ebSSage Weil inode->i_mode = parent->i_mode; 89355da1ebSSage Weil inode->i_uid = parent->i_uid; 90355da1ebSSage Weil inode->i_gid = parent->i_gid; 91355da1ebSSage Weil inode->i_op = &ceph_dir_iops; 92355da1ebSSage Weil inode->i_fop = &ceph_dir_fops; 93b377ff13SSage Weil ci->i_snap_caps = CEPH_CAP_PIN; /* so we can open */ 94b377ff13SSage Weil ci->i_rbytes = 0; 95355da1ebSSage Weil return inode; 96355da1ebSSage Weil } 97355da1ebSSage Weil 98355da1ebSSage Weil const struct inode_operations ceph_file_iops = { 99355da1ebSSage Weil .permission = ceph_permission, 100355da1ebSSage Weil .setattr = ceph_setattr, 101355da1ebSSage Weil .getattr = ceph_getattr, 102355da1ebSSage Weil .setxattr = ceph_setxattr, 103355da1ebSSage Weil .getxattr = ceph_getxattr, 104355da1ebSSage Weil .listxattr = ceph_listxattr, 105355da1ebSSage Weil .removexattr = ceph_removexattr, 106355da1ebSSage Weil }; 107355da1ebSSage Weil 108355da1ebSSage Weil 109355da1ebSSage Weil /* 110355da1ebSSage Weil * We use a 'frag tree' to keep track of the MDS's directory fragments 111355da1ebSSage Weil * for a given inode (usually there is just a single fragment). We 112355da1ebSSage Weil * need to know when a child frag is delegated to a new MDS, or when 113355da1ebSSage Weil * it is flagged as replicated, so we can direct our requests 114355da1ebSSage Weil * accordingly. 115355da1ebSSage Weil */ 116355da1ebSSage Weil 117355da1ebSSage Weil /* 118355da1ebSSage Weil * find/create a frag in the tree 119355da1ebSSage Weil */ 120355da1ebSSage Weil static struct ceph_inode_frag *__get_or_create_frag(struct ceph_inode_info *ci, 121355da1ebSSage Weil u32 f) 122355da1ebSSage Weil { 123355da1ebSSage Weil struct rb_node **p; 124355da1ebSSage Weil struct rb_node *parent = NULL; 125355da1ebSSage Weil struct ceph_inode_frag *frag; 126355da1ebSSage Weil int c; 127355da1ebSSage Weil 128355da1ebSSage Weil p = &ci->i_fragtree.rb_node; 129355da1ebSSage Weil while (*p) { 130355da1ebSSage Weil parent = *p; 131355da1ebSSage Weil frag = rb_entry(parent, struct ceph_inode_frag, node); 132355da1ebSSage Weil c = ceph_frag_compare(f, frag->frag); 133355da1ebSSage Weil if (c < 0) 134355da1ebSSage Weil p = &(*p)->rb_left; 135355da1ebSSage Weil else if (c > 0) 136355da1ebSSage Weil p = &(*p)->rb_right; 137355da1ebSSage Weil else 138355da1ebSSage Weil return frag; 139355da1ebSSage Weil } 140355da1ebSSage Weil 141355da1ebSSage Weil frag = kmalloc(sizeof(*frag), GFP_NOFS); 142355da1ebSSage Weil if (!frag) { 143355da1ebSSage Weil pr_err("__get_or_create_frag ENOMEM on %p %llx.%llx " 144355da1ebSSage Weil "frag %x\n", &ci->vfs_inode, 145355da1ebSSage Weil ceph_vinop(&ci->vfs_inode), f); 146355da1ebSSage Weil return ERR_PTR(-ENOMEM); 147355da1ebSSage Weil } 148355da1ebSSage Weil frag->frag = f; 149355da1ebSSage Weil frag->split_by = 0; 150355da1ebSSage Weil frag->mds = -1; 151355da1ebSSage Weil frag->ndist = 0; 152355da1ebSSage Weil 153355da1ebSSage Weil rb_link_node(&frag->node, parent, p); 154355da1ebSSage Weil rb_insert_color(&frag->node, &ci->i_fragtree); 155355da1ebSSage Weil 156355da1ebSSage Weil dout("get_or_create_frag added %llx.%llx frag %x\n", 157355da1ebSSage Weil ceph_vinop(&ci->vfs_inode), f); 158355da1ebSSage Weil return frag; 159355da1ebSSage Weil } 160355da1ebSSage Weil 161355da1ebSSage Weil /* 162355da1ebSSage Weil * find a specific frag @f 163355da1ebSSage Weil */ 164355da1ebSSage Weil struct ceph_inode_frag *__ceph_find_frag(struct ceph_inode_info *ci, u32 f) 165355da1ebSSage Weil { 166355da1ebSSage Weil struct rb_node *n = ci->i_fragtree.rb_node; 167355da1ebSSage Weil 168355da1ebSSage Weil while (n) { 169355da1ebSSage Weil struct ceph_inode_frag *frag = 170355da1ebSSage Weil rb_entry(n, struct ceph_inode_frag, node); 171355da1ebSSage Weil int c = ceph_frag_compare(f, frag->frag); 172355da1ebSSage Weil if (c < 0) 173355da1ebSSage Weil n = n->rb_left; 174355da1ebSSage Weil else if (c > 0) 175355da1ebSSage Weil n = n->rb_right; 176355da1ebSSage Weil else 177355da1ebSSage Weil return frag; 178355da1ebSSage Weil } 179355da1ebSSage Weil return NULL; 180355da1ebSSage Weil } 181355da1ebSSage Weil 182355da1ebSSage Weil /* 183355da1ebSSage Weil * Choose frag containing the given value @v. If @pfrag is 184355da1ebSSage Weil * specified, copy the frag delegation info to the caller if 185355da1ebSSage Weil * it is present. 186355da1ebSSage Weil */ 187355da1ebSSage Weil u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v, 188355da1ebSSage Weil struct ceph_inode_frag *pfrag, 189355da1ebSSage Weil int *found) 190355da1ebSSage Weil { 191355da1ebSSage Weil u32 t = ceph_frag_make(0, 0); 192355da1ebSSage Weil struct ceph_inode_frag *frag; 193355da1ebSSage Weil unsigned nway, i; 194355da1ebSSage Weil u32 n; 195355da1ebSSage Weil 196355da1ebSSage Weil if (found) 197355da1ebSSage Weil *found = 0; 198355da1ebSSage Weil 199355da1ebSSage Weil mutex_lock(&ci->i_fragtree_mutex); 200355da1ebSSage Weil while (1) { 201355da1ebSSage Weil WARN_ON(!ceph_frag_contains_value(t, v)); 202355da1ebSSage Weil frag = __ceph_find_frag(ci, t); 203355da1ebSSage Weil if (!frag) 204355da1ebSSage Weil break; /* t is a leaf */ 205355da1ebSSage Weil if (frag->split_by == 0) { 206355da1ebSSage Weil if (pfrag) 207355da1ebSSage Weil memcpy(pfrag, frag, sizeof(*pfrag)); 208355da1ebSSage Weil if (found) 209355da1ebSSage Weil *found = 1; 210355da1ebSSage Weil break; 211355da1ebSSage Weil } 212355da1ebSSage Weil 213355da1ebSSage Weil /* choose child */ 214355da1ebSSage Weil nway = 1 << frag->split_by; 215355da1ebSSage Weil dout("choose_frag(%x) %x splits by %d (%d ways)\n", v, t, 216355da1ebSSage Weil frag->split_by, nway); 217355da1ebSSage Weil for (i = 0; i < nway; i++) { 218355da1ebSSage Weil n = ceph_frag_make_child(t, frag->split_by, i); 219355da1ebSSage Weil if (ceph_frag_contains_value(n, v)) { 220355da1ebSSage Weil t = n; 221355da1ebSSage Weil break; 222355da1ebSSage Weil } 223355da1ebSSage Weil } 224355da1ebSSage Weil BUG_ON(i == nway); 225355da1ebSSage Weil } 226355da1ebSSage Weil dout("choose_frag(%x) = %x\n", v, t); 227355da1ebSSage Weil 228355da1ebSSage Weil mutex_unlock(&ci->i_fragtree_mutex); 229355da1ebSSage Weil return t; 230355da1ebSSage Weil } 231355da1ebSSage Weil 232355da1ebSSage Weil /* 233355da1ebSSage Weil * Process dirfrag (delegation) info from the mds. Include leaf 234355da1ebSSage Weil * fragment in tree ONLY if ndist > 0. Otherwise, only 235355da1ebSSage Weil * branches/splits are included in i_fragtree) 236355da1ebSSage Weil */ 237355da1ebSSage Weil static int ceph_fill_dirfrag(struct inode *inode, 238355da1ebSSage Weil struct ceph_mds_reply_dirfrag *dirinfo) 239355da1ebSSage Weil { 240355da1ebSSage Weil struct ceph_inode_info *ci = ceph_inode(inode); 241355da1ebSSage Weil struct ceph_inode_frag *frag; 242355da1ebSSage Weil u32 id = le32_to_cpu(dirinfo->frag); 243355da1ebSSage Weil int mds = le32_to_cpu(dirinfo->auth); 244355da1ebSSage Weil int ndist = le32_to_cpu(dirinfo->ndist); 245355da1ebSSage Weil int i; 246355da1ebSSage Weil int err = 0; 247355da1ebSSage Weil 248355da1ebSSage Weil mutex_lock(&ci->i_fragtree_mutex); 249355da1ebSSage Weil if (ndist == 0) { 250355da1ebSSage Weil /* no delegation info needed. */ 251355da1ebSSage Weil frag = __ceph_find_frag(ci, id); 252355da1ebSSage Weil if (!frag) 253355da1ebSSage Weil goto out; 254355da1ebSSage Weil if (frag->split_by == 0) { 255355da1ebSSage Weil /* tree leaf, remove */ 256355da1ebSSage Weil dout("fill_dirfrag removed %llx.%llx frag %x" 257355da1ebSSage Weil " (no ref)\n", ceph_vinop(inode), id); 258355da1ebSSage Weil rb_erase(&frag->node, &ci->i_fragtree); 259355da1ebSSage Weil kfree(frag); 260355da1ebSSage Weil } else { 261355da1ebSSage Weil /* tree branch, keep and clear */ 262355da1ebSSage Weil dout("fill_dirfrag cleared %llx.%llx frag %x" 263355da1ebSSage Weil " referral\n", ceph_vinop(inode), id); 264355da1ebSSage Weil frag->mds = -1; 265355da1ebSSage Weil frag->ndist = 0; 266355da1ebSSage Weil } 267355da1ebSSage Weil goto out; 268355da1ebSSage Weil } 269355da1ebSSage Weil 270355da1ebSSage Weil 271355da1ebSSage Weil /* find/add this frag to store mds delegation info */ 272355da1ebSSage Weil frag = __get_or_create_frag(ci, id); 273355da1ebSSage Weil if (IS_ERR(frag)) { 274355da1ebSSage Weil /* this is not the end of the world; we can continue 275355da1ebSSage Weil with bad/inaccurate delegation info */ 276355da1ebSSage Weil pr_err("fill_dirfrag ENOMEM on mds ref %llx.%llx fg %x\n", 277355da1ebSSage Weil ceph_vinop(inode), le32_to_cpu(dirinfo->frag)); 278355da1ebSSage Weil err = -ENOMEM; 279355da1ebSSage Weil goto out; 280355da1ebSSage Weil } 281355da1ebSSage Weil 282355da1ebSSage Weil frag->mds = mds; 283355da1ebSSage Weil frag->ndist = min_t(u32, ndist, CEPH_MAX_DIRFRAG_REP); 284355da1ebSSage Weil for (i = 0; i < frag->ndist; i++) 285355da1ebSSage Weil frag->dist[i] = le32_to_cpu(dirinfo->dist[i]); 286355da1ebSSage Weil dout("fill_dirfrag %llx.%llx frag %x ndist=%d\n", 287355da1ebSSage Weil ceph_vinop(inode), frag->frag, frag->ndist); 288355da1ebSSage Weil 289355da1ebSSage Weil out: 290355da1ebSSage Weil mutex_unlock(&ci->i_fragtree_mutex); 291355da1ebSSage Weil return err; 292355da1ebSSage Weil } 293355da1ebSSage Weil 294355da1ebSSage Weil 295355da1ebSSage Weil /* 296355da1ebSSage Weil * initialize a newly allocated inode. 297355da1ebSSage Weil */ 298355da1ebSSage Weil struct inode *ceph_alloc_inode(struct super_block *sb) 299355da1ebSSage Weil { 300355da1ebSSage Weil struct ceph_inode_info *ci; 301355da1ebSSage Weil int i; 302355da1ebSSage Weil 303355da1ebSSage Weil ci = kmem_cache_alloc(ceph_inode_cachep, GFP_NOFS); 304355da1ebSSage Weil if (!ci) 305355da1ebSSage Weil return NULL; 306355da1ebSSage Weil 307355da1ebSSage Weil dout("alloc_inode %p\n", &ci->vfs_inode); 308355da1ebSSage Weil 309be655596SSage Weil spin_lock_init(&ci->i_ceph_lock); 310be655596SSage Weil 311355da1ebSSage Weil ci->i_version = 0; 312355da1ebSSage Weil ci->i_time_warp_seq = 0; 313355da1ebSSage Weil ci->i_ceph_flags = 0; 3142f276c51SYan, Zheng atomic_set(&ci->i_release_count, 1); 3152f276c51SYan, Zheng atomic_set(&ci->i_complete_count, 0); 316355da1ebSSage Weil ci->i_symlink = NULL; 317355da1ebSSage Weil 3186c0f3af7SSage Weil memset(&ci->i_dir_layout, 0, sizeof(ci->i_dir_layout)); 3196c0f3af7SSage Weil 320355da1ebSSage Weil ci->i_fragtree = RB_ROOT; 321355da1ebSSage Weil mutex_init(&ci->i_fragtree_mutex); 322355da1ebSSage Weil 323355da1ebSSage Weil ci->i_xattrs.blob = NULL; 324355da1ebSSage Weil ci->i_xattrs.prealloc_blob = NULL; 325355da1ebSSage Weil ci->i_xattrs.dirty = false; 326355da1ebSSage Weil ci->i_xattrs.index = RB_ROOT; 327355da1ebSSage Weil ci->i_xattrs.count = 0; 328355da1ebSSage Weil ci->i_xattrs.names_size = 0; 329355da1ebSSage Weil ci->i_xattrs.vals_size = 0; 330355da1ebSSage Weil ci->i_xattrs.version = 0; 331355da1ebSSage Weil ci->i_xattrs.index_version = 0; 332355da1ebSSage Weil 333355da1ebSSage Weil ci->i_caps = RB_ROOT; 334355da1ebSSage Weil ci->i_auth_cap = NULL; 335355da1ebSSage Weil ci->i_dirty_caps = 0; 336355da1ebSSage Weil ci->i_flushing_caps = 0; 337355da1ebSSage Weil INIT_LIST_HEAD(&ci->i_dirty_item); 338355da1ebSSage Weil INIT_LIST_HEAD(&ci->i_flushing_item); 339355da1ebSSage Weil ci->i_cap_flush_seq = 0; 340355da1ebSSage Weil ci->i_cap_flush_last_tid = 0; 341355da1ebSSage Weil memset(&ci->i_cap_flush_tid, 0, sizeof(ci->i_cap_flush_tid)); 342355da1ebSSage Weil init_waitqueue_head(&ci->i_cap_wq); 343355da1ebSSage Weil ci->i_hold_caps_min = 0; 344355da1ebSSage Weil ci->i_hold_caps_max = 0; 345355da1ebSSage Weil INIT_LIST_HEAD(&ci->i_cap_delay_list); 346355da1ebSSage Weil ci->i_cap_exporting_mds = 0; 347355da1ebSSage Weil ci->i_cap_exporting_mseq = 0; 348355da1ebSSage Weil ci->i_cap_exporting_issued = 0; 349355da1ebSSage Weil INIT_LIST_HEAD(&ci->i_cap_snaps); 350355da1ebSSage Weil ci->i_head_snapc = NULL; 351355da1ebSSage Weil ci->i_snap_caps = 0; 352355da1ebSSage Weil 353355da1ebSSage Weil for (i = 0; i < CEPH_FILE_MODE_NUM; i++) 354355da1ebSSage Weil ci->i_nr_by_mode[i] = 0; 355355da1ebSSage Weil 356b0d7c223SYan, Zheng mutex_init(&ci->i_truncate_mutex); 357355da1ebSSage Weil ci->i_truncate_seq = 0; 358355da1ebSSage Weil ci->i_truncate_size = 0; 359355da1ebSSage Weil ci->i_truncate_pending = 0; 360355da1ebSSage Weil 361355da1ebSSage Weil ci->i_max_size = 0; 362355da1ebSSage Weil ci->i_reported_size = 0; 363355da1ebSSage Weil ci->i_wanted_max_size = 0; 364355da1ebSSage Weil ci->i_requested_max_size = 0; 365355da1ebSSage Weil 366355da1ebSSage Weil ci->i_pin_ref = 0; 367355da1ebSSage Weil ci->i_rd_ref = 0; 368355da1ebSSage Weil ci->i_rdcache_ref = 0; 369355da1ebSSage Weil ci->i_wr_ref = 0; 370d3d0720dSHenry C Chang ci->i_wb_ref = 0; 371355da1ebSSage Weil ci->i_wrbuffer_ref = 0; 372355da1ebSSage Weil ci->i_wrbuffer_ref_head = 0; 373355da1ebSSage Weil ci->i_shared_gen = 0; 374355da1ebSSage Weil ci->i_rdcache_gen = 0; 375355da1ebSSage Weil ci->i_rdcache_revoking = 0; 376355da1ebSSage Weil 377355da1ebSSage Weil INIT_LIST_HEAD(&ci->i_unsafe_writes); 378355da1ebSSage Weil INIT_LIST_HEAD(&ci->i_unsafe_dirops); 379355da1ebSSage Weil spin_lock_init(&ci->i_unsafe_lock); 380355da1ebSSage Weil 381355da1ebSSage Weil ci->i_snap_realm = NULL; 382355da1ebSSage Weil INIT_LIST_HEAD(&ci->i_snap_realm_item); 383355da1ebSSage Weil INIT_LIST_HEAD(&ci->i_snap_flush_item); 384355da1ebSSage Weil 3853c6f6b79SSage Weil INIT_WORK(&ci->i_wb_work, ceph_writeback_work); 3863c6f6b79SSage Weil INIT_WORK(&ci->i_pg_inv_work, ceph_invalidate_work); 387355da1ebSSage Weil 388355da1ebSSage Weil INIT_WORK(&ci->i_vmtruncate_work, ceph_vmtruncate_work); 389355da1ebSSage Weil 39099ccbd22SMilosz Tanski ceph_fscache_inode_init(ci); 39199ccbd22SMilosz Tanski 392355da1ebSSage Weil return &ci->vfs_inode; 393355da1ebSSage Weil } 394355da1ebSSage Weil 395fa0d7e3dSNick Piggin static void ceph_i_callback(struct rcu_head *head) 396fa0d7e3dSNick Piggin { 397fa0d7e3dSNick Piggin struct inode *inode = container_of(head, struct inode, i_rcu); 398fa0d7e3dSNick Piggin struct ceph_inode_info *ci = ceph_inode(inode); 399fa0d7e3dSNick Piggin 400fa0d7e3dSNick Piggin kmem_cache_free(ceph_inode_cachep, ci); 401fa0d7e3dSNick Piggin } 402fa0d7e3dSNick Piggin 403355da1ebSSage Weil void ceph_destroy_inode(struct inode *inode) 404355da1ebSSage Weil { 405355da1ebSSage Weil struct ceph_inode_info *ci = ceph_inode(inode); 406355da1ebSSage Weil struct ceph_inode_frag *frag; 407355da1ebSSage Weil struct rb_node *n; 408355da1ebSSage Weil 409355da1ebSSage Weil dout("destroy_inode %p ino %llx.%llx\n", inode, ceph_vinop(inode)); 410355da1ebSSage Weil 41199ccbd22SMilosz Tanski ceph_fscache_unregister_inode_cookie(ci); 41299ccbd22SMilosz Tanski 413355da1ebSSage Weil ceph_queue_caps_release(inode); 414355da1ebSSage Weil 4158b218b8aSSage Weil /* 4168b218b8aSSage Weil * we may still have a snap_realm reference if there are stray 4178b218b8aSSage Weil * caps in i_cap_exporting_issued or i_snap_caps. 4188b218b8aSSage Weil */ 4198b218b8aSSage Weil if (ci->i_snap_realm) { 4208b218b8aSSage Weil struct ceph_mds_client *mdsc = 4213d14c5d2SYehuda Sadeh ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; 4228b218b8aSSage Weil struct ceph_snap_realm *realm = ci->i_snap_realm; 4238b218b8aSSage Weil 4248b218b8aSSage Weil dout(" dropping residual ref to snap realm %p\n", realm); 4258b218b8aSSage Weil spin_lock(&realm->inodes_with_caps_lock); 4268b218b8aSSage Weil list_del_init(&ci->i_snap_realm_item); 4278b218b8aSSage Weil spin_unlock(&realm->inodes_with_caps_lock); 4288b218b8aSSage Weil ceph_put_snap_realm(mdsc, realm); 4298b218b8aSSage Weil } 4308b218b8aSSage Weil 431355da1ebSSage Weil kfree(ci->i_symlink); 432355da1ebSSage Weil while ((n = rb_first(&ci->i_fragtree)) != NULL) { 433355da1ebSSage Weil frag = rb_entry(n, struct ceph_inode_frag, node); 434355da1ebSSage Weil rb_erase(n, &ci->i_fragtree); 435355da1ebSSage Weil kfree(frag); 436355da1ebSSage Weil } 437355da1ebSSage Weil 438355da1ebSSage Weil __ceph_destroy_xattrs(ci); 439b6c1d5b8SSage Weil if (ci->i_xattrs.blob) 440355da1ebSSage Weil ceph_buffer_put(ci->i_xattrs.blob); 441b6c1d5b8SSage Weil if (ci->i_xattrs.prealloc_blob) 442355da1ebSSage Weil ceph_buffer_put(ci->i_xattrs.prealloc_blob); 443355da1ebSSage Weil 444fa0d7e3dSNick Piggin call_rcu(&inode->i_rcu, ceph_i_callback); 445355da1ebSSage Weil } 446355da1ebSSage Weil 447355da1ebSSage Weil /* 448355da1ebSSage Weil * Helpers to fill in size, ctime, mtime, and atime. We have to be 449355da1ebSSage Weil * careful because either the client or MDS may have more up to date 450355da1ebSSage Weil * info, depending on which capabilities are held, and whether 451355da1ebSSage Weil * time_warp_seq or truncate_seq have increased. (Ordinarily, mtime 452355da1ebSSage Weil * and size are monotonically increasing, except when utimes() or 453355da1ebSSage Weil * truncate() increments the corresponding _seq values.) 454355da1ebSSage Weil */ 455355da1ebSSage Weil int ceph_fill_file_size(struct inode *inode, int issued, 456355da1ebSSage Weil u32 truncate_seq, u64 truncate_size, u64 size) 457355da1ebSSage Weil { 458355da1ebSSage Weil struct ceph_inode_info *ci = ceph_inode(inode); 459355da1ebSSage Weil int queue_trunc = 0; 460355da1ebSSage Weil 461355da1ebSSage Weil if (ceph_seq_cmp(truncate_seq, ci->i_truncate_seq) > 0 || 462355da1ebSSage Weil (truncate_seq == ci->i_truncate_seq && size > inode->i_size)) { 463355da1ebSSage Weil dout("size %lld -> %llu\n", inode->i_size, size); 464355da1ebSSage Weil inode->i_size = size; 465355da1ebSSage Weil inode->i_blocks = (size + (1<<9) - 1) >> 9; 466355da1ebSSage Weil ci->i_reported_size = size; 467355da1ebSSage Weil if (truncate_seq != ci->i_truncate_seq) { 468355da1ebSSage Weil dout("truncate_seq %u -> %u\n", 469355da1ebSSage Weil ci->i_truncate_seq, truncate_seq); 470355da1ebSSage Weil ci->i_truncate_seq = truncate_seq; 471b0d7c223SYan, Zheng 472b0d7c223SYan, Zheng /* the MDS should have revoked these caps */ 473b0d7c223SYan, Zheng WARN_ON_ONCE(issued & (CEPH_CAP_FILE_EXCL | 474b0d7c223SYan, Zheng CEPH_CAP_FILE_RD | 475b0d7c223SYan, Zheng CEPH_CAP_FILE_WR | 476b0d7c223SYan, Zheng CEPH_CAP_FILE_LAZYIO)); 4773d497d85SYehuda Sadeh /* 4783d497d85SYehuda Sadeh * If we hold relevant caps, or in the case where we're 4793d497d85SYehuda Sadeh * not the only client referencing this file and we 4803d497d85SYehuda Sadeh * don't hold those caps, then we need to check whether 4813d497d85SYehuda Sadeh * the file is either opened or mmaped 4823d497d85SYehuda Sadeh */ 483b0d7c223SYan, Zheng if ((issued & (CEPH_CAP_FILE_CACHE| 484b0d7c223SYan, Zheng CEPH_CAP_FILE_BUFFER)) || 4853d497d85SYehuda Sadeh mapping_mapped(inode->i_mapping) || 4863d497d85SYehuda Sadeh __ceph_caps_file_wanted(ci)) { 487355da1ebSSage Weil ci->i_truncate_pending++; 488355da1ebSSage Weil queue_trunc = 1; 489355da1ebSSage Weil } 490355da1ebSSage Weil } 491355da1ebSSage Weil } 492355da1ebSSage Weil if (ceph_seq_cmp(truncate_seq, ci->i_truncate_seq) >= 0 && 493355da1ebSSage Weil ci->i_truncate_size != truncate_size) { 494355da1ebSSage Weil dout("truncate_size %lld -> %llu\n", ci->i_truncate_size, 495355da1ebSSage Weil truncate_size); 496355da1ebSSage Weil ci->i_truncate_size = truncate_size; 497355da1ebSSage Weil } 49899ccbd22SMilosz Tanski 49999ccbd22SMilosz Tanski if (queue_trunc) 50099ccbd22SMilosz Tanski ceph_fscache_invalidate(inode); 50199ccbd22SMilosz Tanski 502355da1ebSSage Weil return queue_trunc; 503355da1ebSSage Weil } 504355da1ebSSage Weil 505355da1ebSSage Weil void ceph_fill_file_time(struct inode *inode, int issued, 506355da1ebSSage Weil u64 time_warp_seq, struct timespec *ctime, 507355da1ebSSage Weil struct timespec *mtime, struct timespec *atime) 508355da1ebSSage Weil { 509355da1ebSSage Weil struct ceph_inode_info *ci = ceph_inode(inode); 510355da1ebSSage Weil int warn = 0; 511355da1ebSSage Weil 512355da1ebSSage Weil if (issued & (CEPH_CAP_FILE_EXCL| 513355da1ebSSage Weil CEPH_CAP_FILE_WR| 514d8672d64SSage Weil CEPH_CAP_FILE_BUFFER| 515d8672d64SSage Weil CEPH_CAP_AUTH_EXCL| 516d8672d64SSage Weil CEPH_CAP_XATTR_EXCL)) { 517355da1ebSSage Weil if (timespec_compare(ctime, &inode->i_ctime) > 0) { 518355da1ebSSage Weil dout("ctime %ld.%09ld -> %ld.%09ld inc w/ cap\n", 519355da1ebSSage Weil inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, 520355da1ebSSage Weil ctime->tv_sec, ctime->tv_nsec); 521355da1ebSSage Weil inode->i_ctime = *ctime; 522355da1ebSSage Weil } 523355da1ebSSage Weil if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) > 0) { 524355da1ebSSage Weil /* the MDS did a utimes() */ 525355da1ebSSage Weil dout("mtime %ld.%09ld -> %ld.%09ld " 526355da1ebSSage Weil "tw %d -> %d\n", 527355da1ebSSage Weil inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec, 528355da1ebSSage Weil mtime->tv_sec, mtime->tv_nsec, 529355da1ebSSage Weil ci->i_time_warp_seq, (int)time_warp_seq); 530355da1ebSSage Weil 531355da1ebSSage Weil inode->i_mtime = *mtime; 532355da1ebSSage Weil inode->i_atime = *atime; 533355da1ebSSage Weil ci->i_time_warp_seq = time_warp_seq; 534355da1ebSSage Weil } else if (time_warp_seq == ci->i_time_warp_seq) { 535355da1ebSSage Weil /* nobody did utimes(); take the max */ 536355da1ebSSage Weil if (timespec_compare(mtime, &inode->i_mtime) > 0) { 537355da1ebSSage Weil dout("mtime %ld.%09ld -> %ld.%09ld inc\n", 538355da1ebSSage Weil inode->i_mtime.tv_sec, 539355da1ebSSage Weil inode->i_mtime.tv_nsec, 540355da1ebSSage Weil mtime->tv_sec, mtime->tv_nsec); 541355da1ebSSage Weil inode->i_mtime = *mtime; 542355da1ebSSage Weil } 543355da1ebSSage Weil if (timespec_compare(atime, &inode->i_atime) > 0) { 544355da1ebSSage Weil dout("atime %ld.%09ld -> %ld.%09ld inc\n", 545355da1ebSSage Weil inode->i_atime.tv_sec, 546355da1ebSSage Weil inode->i_atime.tv_nsec, 547355da1ebSSage Weil atime->tv_sec, atime->tv_nsec); 548355da1ebSSage Weil inode->i_atime = *atime; 549355da1ebSSage Weil } 550355da1ebSSage Weil } else if (issued & CEPH_CAP_FILE_EXCL) { 551355da1ebSSage Weil /* we did a utimes(); ignore mds values */ 552355da1ebSSage Weil } else { 553355da1ebSSage Weil warn = 1; 554355da1ebSSage Weil } 555355da1ebSSage Weil } else { 556d8672d64SSage Weil /* we have no write|excl caps; whatever the MDS says is true */ 557355da1ebSSage Weil if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) >= 0) { 558355da1ebSSage Weil inode->i_ctime = *ctime; 559355da1ebSSage Weil inode->i_mtime = *mtime; 560355da1ebSSage Weil inode->i_atime = *atime; 561355da1ebSSage Weil ci->i_time_warp_seq = time_warp_seq; 562355da1ebSSage Weil } else { 563355da1ebSSage Weil warn = 1; 564355da1ebSSage Weil } 565355da1ebSSage Weil } 566355da1ebSSage Weil if (warn) /* time_warp_seq shouldn't go backwards */ 567355da1ebSSage Weil dout("%p mds time_warp_seq %llu < %u\n", 568355da1ebSSage Weil inode, time_warp_seq, ci->i_time_warp_seq); 569355da1ebSSage Weil } 570355da1ebSSage Weil 571355da1ebSSage Weil /* 572355da1ebSSage Weil * Populate an inode based on info from mds. May be called on new or 573355da1ebSSage Weil * existing inodes. 574355da1ebSSage Weil */ 575355da1ebSSage Weil static int fill_inode(struct inode *inode, 576355da1ebSSage Weil struct ceph_mds_reply_info_in *iinfo, 577355da1ebSSage Weil struct ceph_mds_reply_dirfrag *dirinfo, 578355da1ebSSage Weil struct ceph_mds_session *session, 579355da1ebSSage Weil unsigned long ttl_from, int cap_fmode, 580355da1ebSSage Weil struct ceph_cap_reservation *caps_reservation) 581355da1ebSSage Weil { 582355da1ebSSage Weil struct ceph_mds_reply_inode *info = iinfo->in; 583355da1ebSSage Weil struct ceph_inode_info *ci = ceph_inode(inode); 584355da1ebSSage Weil int i; 585dfabbed6SSage Weil int issued = 0, implemented; 586355da1ebSSage Weil struct timespec mtime, atime, ctime; 587355da1ebSSage Weil u32 nsplits; 588355da1ebSSage Weil struct ceph_buffer *xattr_blob = NULL; 589355da1ebSSage Weil int err = 0; 590355da1ebSSage Weil int queue_trunc = 0; 591355da1ebSSage Weil 592355da1ebSSage Weil dout("fill_inode %p ino %llx.%llx v %llu had %llu\n", 593355da1ebSSage Weil inode, ceph_vinop(inode), le64_to_cpu(info->version), 594355da1ebSSage Weil ci->i_version); 595355da1ebSSage Weil 596355da1ebSSage Weil /* 597355da1ebSSage Weil * prealloc xattr data, if it looks like we'll need it. only 598355da1ebSSage Weil * if len > 4 (meaning there are actually xattrs; the first 4 599355da1ebSSage Weil * bytes are the xattr count). 600355da1ebSSage Weil */ 601355da1ebSSage Weil if (iinfo->xattr_len > 4) { 602b6c1d5b8SSage Weil xattr_blob = ceph_buffer_new(iinfo->xattr_len, GFP_NOFS); 603355da1ebSSage Weil if (!xattr_blob) 604355da1ebSSage Weil pr_err("fill_inode ENOMEM xattr blob %d bytes\n", 605355da1ebSSage Weil iinfo->xattr_len); 606355da1ebSSage Weil } 607355da1ebSSage Weil 608be655596SSage Weil spin_lock(&ci->i_ceph_lock); 609355da1ebSSage Weil 610355da1ebSSage Weil /* 611355da1ebSSage Weil * provided version will be odd if inode value is projected, 6128bd59e01SSage Weil * even if stable. skip the update if we have newer stable 6138bd59e01SSage Weil * info (ours>=theirs, e.g. due to racing mds replies), unless 6148bd59e01SSage Weil * we are getting projected (unstable) info (in which case the 6158bd59e01SSage Weil * version is odd, and we want ours>theirs). 6168bd59e01SSage Weil * us them 6178bd59e01SSage Weil * 2 2 skip 6188bd59e01SSage Weil * 3 2 skip 6198bd59e01SSage Weil * 3 3 update 620355da1ebSSage Weil */ 621355da1ebSSage Weil if (le64_to_cpu(info->version) > 0 && 6228bd59e01SSage Weil (ci->i_version & ~1) >= le64_to_cpu(info->version)) 623355da1ebSSage Weil goto no_change; 624355da1ebSSage Weil 625355da1ebSSage Weil issued = __ceph_caps_issued(ci, &implemented); 626355da1ebSSage Weil issued |= implemented | __ceph_caps_dirty(ci); 627355da1ebSSage Weil 628355da1ebSSage Weil /* update inode */ 629355da1ebSSage Weil ci->i_version = le64_to_cpu(info->version); 630355da1ebSSage Weil inode->i_version++; 631355da1ebSSage Weil inode->i_rdev = le32_to_cpu(info->rdev); 632355da1ebSSage Weil 633355da1ebSSage Weil if ((issued & CEPH_CAP_AUTH_EXCL) == 0) { 634355da1ebSSage Weil inode->i_mode = le32_to_cpu(info->mode); 635ab871b90SEric W. Biederman inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(info->uid)); 636ab871b90SEric W. Biederman inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(info->gid)); 637355da1ebSSage Weil dout("%p mode 0%o uid.gid %d.%d\n", inode, inode->i_mode, 638bd2bae6aSEric W. Biederman from_kuid(&init_user_ns, inode->i_uid), 639bd2bae6aSEric W. Biederman from_kgid(&init_user_ns, inode->i_gid)); 640355da1ebSSage Weil } 641355da1ebSSage Weil 642355da1ebSSage Weil if ((issued & CEPH_CAP_LINK_EXCL) == 0) 643bfe86848SMiklos Szeredi set_nlink(inode, le32_to_cpu(info->nlink)); 644355da1ebSSage Weil 645355da1ebSSage Weil /* be careful with mtime, atime, size */ 646355da1ebSSage Weil ceph_decode_timespec(&atime, &info->atime); 647355da1ebSSage Weil ceph_decode_timespec(&mtime, &info->mtime); 648355da1ebSSage Weil ceph_decode_timespec(&ctime, &info->ctime); 649355da1ebSSage Weil queue_trunc = ceph_fill_file_size(inode, issued, 650355da1ebSSage Weil le32_to_cpu(info->truncate_seq), 651355da1ebSSage Weil le64_to_cpu(info->truncate_size), 652355da1ebSSage Weil le64_to_cpu(info->size)); 653355da1ebSSage Weil ceph_fill_file_time(inode, issued, 654355da1ebSSage Weil le32_to_cpu(info->time_warp_seq), 655355da1ebSSage Weil &ctime, &mtime, &atime); 656355da1ebSSage Weil 657912a9b03SSage Weil /* only update max_size on auth cap */ 658912a9b03SSage Weil if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) && 659912a9b03SSage Weil ci->i_max_size != le64_to_cpu(info->max_size)) { 660912a9b03SSage Weil dout("max_size %lld -> %llu\n", ci->i_max_size, 661912a9b03SSage Weil le64_to_cpu(info->max_size)); 662355da1ebSSage Weil ci->i_max_size = le64_to_cpu(info->max_size); 663912a9b03SSage Weil } 664912a9b03SSage Weil 665355da1ebSSage Weil ci->i_layout = info->layout; 666355da1ebSSage Weil inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; 667355da1ebSSage Weil 668355da1ebSSage Weil /* xattrs */ 669355da1ebSSage Weil /* note that if i_xattrs.len <= 4, i_xattrs.data will still be NULL. */ 670355da1ebSSage Weil if ((issued & CEPH_CAP_XATTR_EXCL) == 0 && 671355da1ebSSage Weil le64_to_cpu(info->xattr_version) > ci->i_xattrs.version) { 672355da1ebSSage Weil if (ci->i_xattrs.blob) 673355da1ebSSage Weil ceph_buffer_put(ci->i_xattrs.blob); 674355da1ebSSage Weil ci->i_xattrs.blob = xattr_blob; 675355da1ebSSage Weil if (xattr_blob) 676355da1ebSSage Weil memcpy(ci->i_xattrs.blob->vec.iov_base, 677355da1ebSSage Weil iinfo->xattr_data, iinfo->xattr_len); 678355da1ebSSage Weil ci->i_xattrs.version = le64_to_cpu(info->xattr_version); 679a6424e48SSage Weil xattr_blob = NULL; 680355da1ebSSage Weil } 681355da1ebSSage Weil 682355da1ebSSage Weil inode->i_mapping->a_ops = &ceph_aops; 683355da1ebSSage Weil inode->i_mapping->backing_dev_info = 684640ef79dSCheng Renquan &ceph_sb_to_client(inode->i_sb)->backing_dev_info; 685355da1ebSSage Weil 686355da1ebSSage Weil switch (inode->i_mode & S_IFMT) { 687355da1ebSSage Weil case S_IFIFO: 688355da1ebSSage Weil case S_IFBLK: 689355da1ebSSage Weil case S_IFCHR: 690355da1ebSSage Weil case S_IFSOCK: 691355da1ebSSage Weil init_special_inode(inode, inode->i_mode, inode->i_rdev); 692355da1ebSSage Weil inode->i_op = &ceph_file_iops; 693355da1ebSSage Weil break; 694355da1ebSSage Weil case S_IFREG: 695355da1ebSSage Weil inode->i_op = &ceph_file_iops; 696355da1ebSSage Weil inode->i_fop = &ceph_file_fops; 697355da1ebSSage Weil break; 698355da1ebSSage Weil case S_IFLNK: 699355da1ebSSage Weil inode->i_op = &ceph_symlink_iops; 700355da1ebSSage Weil if (!ci->i_symlink) { 701810339ecSXi Wang u32 symlen = iinfo->symlink_len; 702355da1ebSSage Weil char *sym; 703355da1ebSSage Weil 704be655596SSage Weil spin_unlock(&ci->i_ceph_lock); 705355da1ebSSage Weil 706810339ecSXi Wang err = -EINVAL; 707810339ecSXi Wang if (WARN_ON(symlen != inode->i_size)) 708810339ecSXi Wang goto out; 709810339ecSXi Wang 710355da1ebSSage Weil err = -ENOMEM; 711810339ecSXi Wang sym = kstrndup(iinfo->symlink, symlen, GFP_NOFS); 712355da1ebSSage Weil if (!sym) 713355da1ebSSage Weil goto out; 714355da1ebSSage Weil 715be655596SSage Weil spin_lock(&ci->i_ceph_lock); 716355da1ebSSage Weil if (!ci->i_symlink) 717355da1ebSSage Weil ci->i_symlink = sym; 718355da1ebSSage Weil else 719355da1ebSSage Weil kfree(sym); /* lost a race */ 720355da1ebSSage Weil } 721355da1ebSSage Weil break; 722355da1ebSSage Weil case S_IFDIR: 723355da1ebSSage Weil inode->i_op = &ceph_dir_iops; 724355da1ebSSage Weil inode->i_fop = &ceph_dir_fops; 725355da1ebSSage Weil 72614303d20SSage Weil ci->i_dir_layout = iinfo->dir_layout; 72714303d20SSage Weil 728355da1ebSSage Weil ci->i_files = le64_to_cpu(info->files); 729355da1ebSSage Weil ci->i_subdirs = le64_to_cpu(info->subdirs); 730355da1ebSSage Weil ci->i_rbytes = le64_to_cpu(info->rbytes); 731355da1ebSSage Weil ci->i_rfiles = le64_to_cpu(info->rfiles); 732355da1ebSSage Weil ci->i_rsubdirs = le64_to_cpu(info->rsubdirs); 733355da1ebSSage Weil ceph_decode_timespec(&ci->i_rctime, &info->rctime); 734355da1ebSSage Weil break; 735355da1ebSSage Weil default: 736355da1ebSSage Weil pr_err("fill_inode %llx.%llx BAD mode 0%o\n", 737355da1ebSSage Weil ceph_vinop(inode), inode->i_mode); 738355da1ebSSage Weil } 739355da1ebSSage Weil 740a8673d61SYan, Zheng /* set dir completion flag? */ 741a8673d61SYan, Zheng if (S_ISDIR(inode->i_mode) && 742a8673d61SYan, Zheng ci->i_files == 0 && ci->i_subdirs == 0 && 743a8673d61SYan, Zheng ceph_snap(inode) == CEPH_NOSNAP && 744a8673d61SYan, Zheng (le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED) && 745a8673d61SYan, Zheng (issued & CEPH_CAP_FILE_EXCL) == 0 && 7462f276c51SYan, Zheng !__ceph_dir_is_complete(ci)) { 747a8673d61SYan, Zheng dout(" marking %p complete (empty)\n", inode); 7482f276c51SYan, Zheng __ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count)); 749a8673d61SYan, Zheng ci->i_max_offset = 2; 750a8673d61SYan, Zheng } 751355da1ebSSage Weil no_change: 752be655596SSage Weil spin_unlock(&ci->i_ceph_lock); 753355da1ebSSage Weil 754355da1ebSSage Weil /* queue truncate if we saw i_size decrease */ 755355da1ebSSage Weil if (queue_trunc) 7563c6f6b79SSage Weil ceph_queue_vmtruncate(inode); 757355da1ebSSage Weil 758355da1ebSSage Weil /* populate frag tree */ 759355da1ebSSage Weil /* FIXME: move me up, if/when version reflects fragtree changes */ 760355da1ebSSage Weil nsplits = le32_to_cpu(info->fragtree.nsplits); 761355da1ebSSage Weil mutex_lock(&ci->i_fragtree_mutex); 762355da1ebSSage Weil for (i = 0; i < nsplits; i++) { 763355da1ebSSage Weil u32 id = le32_to_cpu(info->fragtree.splits[i].frag); 764355da1ebSSage Weil struct ceph_inode_frag *frag = __get_or_create_frag(ci, id); 765355da1ebSSage Weil 766355da1ebSSage Weil if (IS_ERR(frag)) 767355da1ebSSage Weil continue; 768355da1ebSSage Weil frag->split_by = le32_to_cpu(info->fragtree.splits[i].by); 769355da1ebSSage Weil dout(" frag %x split by %d\n", frag->frag, frag->split_by); 770355da1ebSSage Weil } 771355da1ebSSage Weil mutex_unlock(&ci->i_fragtree_mutex); 772355da1ebSSage Weil 773355da1ebSSage Weil /* were we issued a capability? */ 774355da1ebSSage Weil if (info->cap.caps) { 775355da1ebSSage Weil if (ceph_snap(inode) == CEPH_NOSNAP) { 776355da1ebSSage Weil ceph_add_cap(inode, session, 777355da1ebSSage Weil le64_to_cpu(info->cap.cap_id), 778355da1ebSSage Weil cap_fmode, 779355da1ebSSage Weil le32_to_cpu(info->cap.caps), 780355da1ebSSage Weil le32_to_cpu(info->cap.wanted), 781355da1ebSSage Weil le32_to_cpu(info->cap.seq), 782355da1ebSSage Weil le32_to_cpu(info->cap.mseq), 783355da1ebSSage Weil le64_to_cpu(info->cap.realm), 784355da1ebSSage Weil info->cap.flags, 785355da1ebSSage Weil caps_reservation); 786355da1ebSSage Weil } else { 787be655596SSage Weil spin_lock(&ci->i_ceph_lock); 788355da1ebSSage Weil dout(" %p got snap_caps %s\n", inode, 789355da1ebSSage Weil ceph_cap_string(le32_to_cpu(info->cap.caps))); 790355da1ebSSage Weil ci->i_snap_caps |= le32_to_cpu(info->cap.caps); 791355da1ebSSage Weil if (cap_fmode >= 0) 792355da1ebSSage Weil __ceph_get_fmode(ci, cap_fmode); 793be655596SSage Weil spin_unlock(&ci->i_ceph_lock); 794355da1ebSSage Weil } 79504d000ebSSage Weil } else if (cap_fmode >= 0) { 79604d000ebSSage Weil pr_warning("mds issued no caps on %llx.%llx\n", 79704d000ebSSage Weil ceph_vinop(inode)); 79804d000ebSSage Weil __ceph_get_fmode(ci, cap_fmode); 799355da1ebSSage Weil } 800355da1ebSSage Weil 801355da1ebSSage Weil /* update delegation info? */ 802355da1ebSSage Weil if (dirinfo) 803355da1ebSSage Weil ceph_fill_dirfrag(inode, dirinfo); 804355da1ebSSage Weil 805355da1ebSSage Weil err = 0; 806355da1ebSSage Weil 807355da1ebSSage Weil out: 808b6c1d5b8SSage Weil if (xattr_blob) 809355da1ebSSage Weil ceph_buffer_put(xattr_blob); 810355da1ebSSage Weil return err; 811355da1ebSSage Weil } 812355da1ebSSage Weil 813355da1ebSSage Weil /* 814355da1ebSSage Weil * caller should hold session s_mutex. 815355da1ebSSage Weil */ 816355da1ebSSage Weil static void update_dentry_lease(struct dentry *dentry, 817355da1ebSSage Weil struct ceph_mds_reply_lease *lease, 818355da1ebSSage Weil struct ceph_mds_session *session, 819355da1ebSSage Weil unsigned long from_time) 820355da1ebSSage Weil { 821355da1ebSSage Weil struct ceph_dentry_info *di = ceph_dentry(dentry); 822355da1ebSSage Weil long unsigned duration = le32_to_cpu(lease->duration_ms); 823355da1ebSSage Weil long unsigned ttl = from_time + (duration * HZ) / 1000; 824355da1ebSSage Weil long unsigned half_ttl = from_time + (duration * HZ / 2) / 1000; 825355da1ebSSage Weil struct inode *dir; 826355da1ebSSage Weil 827355da1ebSSage Weil /* only track leases on regular dentries */ 828355da1ebSSage Weil if (dentry->d_op != &ceph_dentry_ops) 829355da1ebSSage Weil return; 830355da1ebSSage Weil 831355da1ebSSage Weil spin_lock(&dentry->d_lock); 8322f90b852SSage Weil dout("update_dentry_lease %p duration %lu ms ttl %lu\n", 8332f90b852SSage Weil dentry, duration, ttl); 834355da1ebSSage Weil 835355da1ebSSage Weil /* make lease_rdcache_gen match directory */ 836355da1ebSSage Weil dir = dentry->d_parent->d_inode; 837355da1ebSSage Weil di->lease_shared_gen = ceph_inode(dir)->i_shared_gen; 838355da1ebSSage Weil 8392f90b852SSage Weil if (duration == 0) 840355da1ebSSage Weil goto out_unlock; 841355da1ebSSage Weil 842355da1ebSSage Weil if (di->lease_gen == session->s_cap_gen && 843355da1ebSSage Weil time_before(ttl, dentry->d_time)) 844355da1ebSSage Weil goto out_unlock; /* we already have a newer lease. */ 845355da1ebSSage Weil 846355da1ebSSage Weil if (di->lease_session && di->lease_session != session) 847355da1ebSSage Weil goto out_unlock; 848355da1ebSSage Weil 849355da1ebSSage Weil ceph_dentry_lru_touch(dentry); 850355da1ebSSage Weil 851355da1ebSSage Weil if (!di->lease_session) 852355da1ebSSage Weil di->lease_session = ceph_get_mds_session(session); 853355da1ebSSage Weil di->lease_gen = session->s_cap_gen; 854355da1ebSSage Weil di->lease_seq = le32_to_cpu(lease->seq); 855355da1ebSSage Weil di->lease_renew_after = half_ttl; 856355da1ebSSage Weil di->lease_renew_from = 0; 857355da1ebSSage Weil dentry->d_time = ttl; 858355da1ebSSage Weil out_unlock: 859355da1ebSSage Weil spin_unlock(&dentry->d_lock); 860355da1ebSSage Weil return; 861355da1ebSSage Weil } 862355da1ebSSage Weil 863355da1ebSSage Weil /* 8644baa75efSYehuda Sadeh * Set dentry's directory position based on the current dir's max, and 8654baa75efSYehuda Sadeh * order it in d_subdirs, so that dcache_readdir behaves. 8664f177264SSage Weil * 8674f177264SSage Weil * Always called under directory's i_mutex. 8684baa75efSYehuda Sadeh */ 8694baa75efSYehuda Sadeh static void ceph_set_dentry_offset(struct dentry *dn) 8704baa75efSYehuda Sadeh { 8714baa75efSYehuda Sadeh struct dentry *dir = dn->d_parent; 8724f177264SSage Weil struct inode *inode = dir->d_inode; 873b8cd952bSYehuda Sadeh struct ceph_inode_info *ci; 8744baa75efSYehuda Sadeh struct ceph_dentry_info *di; 8754baa75efSYehuda Sadeh 8764baa75efSYehuda Sadeh BUG_ON(!inode); 8774baa75efSYehuda Sadeh 878b8cd952bSYehuda Sadeh ci = ceph_inode(inode); 8794baa75efSYehuda Sadeh di = ceph_dentry(dn); 8804baa75efSYehuda Sadeh 881be655596SSage Weil spin_lock(&ci->i_ceph_lock); 8822f276c51SYan, Zheng if (!__ceph_dir_is_complete(ci)) { 883be655596SSage Weil spin_unlock(&ci->i_ceph_lock); 884e8a74987SSage Weil return; 885e8a74987SSage Weil } 8864baa75efSYehuda Sadeh di->offset = ceph_inode(inode)->i_max_offset++; 887be655596SSage Weil spin_unlock(&ci->i_ceph_lock); 8884baa75efSYehuda Sadeh 8892fd6b7f5SNick Piggin spin_lock(&dir->d_lock); 8902fd6b7f5SNick Piggin spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED); 89113a4214cSHenry C Chang list_move(&dn->d_u.d_child, &dir->d_subdirs); 8924baa75efSYehuda Sadeh dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset, 8934baa75efSYehuda Sadeh dn->d_u.d_child.prev, dn->d_u.d_child.next); 8944baa75efSYehuda Sadeh spin_unlock(&dn->d_lock); 8952fd6b7f5SNick Piggin spin_unlock(&dir->d_lock); 8964baa75efSYehuda Sadeh } 8974baa75efSYehuda Sadeh 8984baa75efSYehuda Sadeh /* 8991cd3935bSSage Weil * splice a dentry to an inode. 9001cd3935bSSage Weil * caller must hold directory i_mutex for this to be safe. 9011cd3935bSSage Weil * 9021cd3935bSSage Weil * we will only rehash the resulting dentry if @prehash is 9031cd3935bSSage Weil * true; @prehash will be set to false (for the benefit of 9041cd3935bSSage Weil * the caller) if we fail. 9051cd3935bSSage Weil */ 9061cd3935bSSage Weil static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, 907467c5251SSage Weil bool *prehash, bool set_offset) 9081cd3935bSSage Weil { 9091cd3935bSSage Weil struct dentry *realdn; 9101cd3935bSSage Weil 9111cd3935bSSage Weil BUG_ON(dn->d_inode); 9121cd3935bSSage Weil 9131cd3935bSSage Weil /* dn must be unhashed */ 9141cd3935bSSage Weil if (!d_unhashed(dn)) 9151cd3935bSSage Weil d_drop(dn); 9161cd3935bSSage Weil realdn = d_materialise_unique(dn, in); 9171cd3935bSSage Weil if (IS_ERR(realdn)) { 918d69ed05aSSage Weil pr_err("splice_dentry error %ld %p inode %p ino %llx.%llx\n", 919d69ed05aSSage Weil PTR_ERR(realdn), dn, in, ceph_vinop(in)); 9201cd3935bSSage Weil if (prehash) 9211cd3935bSSage Weil *prehash = false; /* don't rehash on error */ 9221cd3935bSSage Weil dn = realdn; /* note realdn contains the error */ 9231cd3935bSSage Weil goto out; 9241cd3935bSSage Weil } else if (realdn) { 9251cd3935bSSage Weil dout("dn %p (%d) spliced with %p (%d) " 9261cd3935bSSage Weil "inode %p ino %llx.%llx\n", 92784d08fa8SAl Viro dn, d_count(dn), 92884d08fa8SAl Viro realdn, d_count(realdn), 9291cd3935bSSage Weil realdn->d_inode, ceph_vinop(realdn->d_inode)); 9301cd3935bSSage Weil dput(dn); 9311cd3935bSSage Weil dn = realdn; 9321cd3935bSSage Weil } else { 9331cd3935bSSage Weil BUG_ON(!ceph_dentry(dn)); 9341cd3935bSSage Weil dout("dn %p attached to %p ino %llx.%llx\n", 9351cd3935bSSage Weil dn, dn->d_inode, ceph_vinop(dn->d_inode)); 9361cd3935bSSage Weil } 9371cd3935bSSage Weil if ((!prehash || *prehash) && d_unhashed(dn)) 9381cd3935bSSage Weil d_rehash(dn); 939467c5251SSage Weil if (set_offset) 9401cd3935bSSage Weil ceph_set_dentry_offset(dn); 9411cd3935bSSage Weil out: 9421cd3935bSSage Weil return dn; 9431cd3935bSSage Weil } 9441cd3935bSSage Weil 9451cd3935bSSage Weil /* 946355da1ebSSage Weil * Incorporate results into the local cache. This is either just 947355da1ebSSage Weil * one inode, or a directory, dentry, and possibly linked-to inode (e.g., 948355da1ebSSage Weil * after a lookup). 949355da1ebSSage Weil * 950355da1ebSSage Weil * A reply may contain 951355da1ebSSage Weil * a directory inode along with a dentry. 952355da1ebSSage Weil * and/or a target inode 953355da1ebSSage Weil * 954355da1ebSSage Weil * Called with snap_rwsem (read). 955355da1ebSSage Weil */ 956355da1ebSSage Weil int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, 957355da1ebSSage Weil struct ceph_mds_session *session) 958355da1ebSSage Weil { 959355da1ebSSage Weil struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info; 960355da1ebSSage Weil struct inode *in = NULL; 961355da1ebSSage Weil struct ceph_mds_reply_inode *ininfo; 962355da1ebSSage Weil struct ceph_vino vino; 9633d14c5d2SYehuda Sadeh struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 964355da1ebSSage Weil int i = 0; 965355da1ebSSage Weil int err = 0; 966355da1ebSSage Weil 967355da1ebSSage Weil dout("fill_trace %p is_dentry %d is_target %d\n", req, 968355da1ebSSage Weil rinfo->head->is_dentry, rinfo->head->is_target); 969355da1ebSSage Weil 970355da1ebSSage Weil #if 0 971355da1ebSSage Weil /* 972355da1ebSSage Weil * Debugging hook: 973355da1ebSSage Weil * 974355da1ebSSage Weil * If we resend completed ops to a recovering mds, we get no 975355da1ebSSage Weil * trace. Since that is very rare, pretend this is the case 976355da1ebSSage Weil * to ensure the 'no trace' handlers in the callers behave. 977355da1ebSSage Weil * 978355da1ebSSage Weil * Fill in inodes unconditionally to avoid breaking cap 979355da1ebSSage Weil * invariants. 980355da1ebSSage Weil */ 981355da1ebSSage Weil if (rinfo->head->op & CEPH_MDS_OP_WRITE) { 982355da1ebSSage Weil pr_info("fill_trace faking empty trace on %lld %s\n", 983355da1ebSSage Weil req->r_tid, ceph_mds_op_name(rinfo->head->op)); 984355da1ebSSage Weil if (rinfo->head->is_dentry) { 985355da1ebSSage Weil rinfo->head->is_dentry = 0; 986355da1ebSSage Weil err = fill_inode(req->r_locked_dir, 987355da1ebSSage Weil &rinfo->diri, rinfo->dirfrag, 988355da1ebSSage Weil session, req->r_request_started, -1); 989355da1ebSSage Weil } 990355da1ebSSage Weil if (rinfo->head->is_target) { 991355da1ebSSage Weil rinfo->head->is_target = 0; 992355da1ebSSage Weil ininfo = rinfo->targeti.in; 993355da1ebSSage Weil vino.ino = le64_to_cpu(ininfo->ino); 994355da1ebSSage Weil vino.snap = le64_to_cpu(ininfo->snapid); 995355da1ebSSage Weil in = ceph_get_inode(sb, vino); 996355da1ebSSage Weil err = fill_inode(in, &rinfo->targeti, NULL, 997355da1ebSSage Weil session, req->r_request_started, 998355da1ebSSage Weil req->r_fmode); 999355da1ebSSage Weil iput(in); 1000355da1ebSSage Weil } 1001355da1ebSSage Weil } 1002355da1ebSSage Weil #endif 1003355da1ebSSage Weil 1004355da1ebSSage Weil if (!rinfo->head->is_target && !rinfo->head->is_dentry) { 1005355da1ebSSage Weil dout("fill_trace reply is empty!\n"); 1006167c9e35SSage Weil if (rinfo->head->result == 0 && req->r_locked_dir) 1007167c9e35SSage Weil ceph_invalidate_dir_request(req); 1008355da1ebSSage Weil return 0; 1009355da1ebSSage Weil } 1010355da1ebSSage Weil 1011355da1ebSSage Weil if (rinfo->head->is_dentry) { 10125b1daecdSSage Weil struct inode *dir = req->r_locked_dir; 10135b1daecdSSage Weil 10146c5e50faSSage Weil if (dir) { 10155b1daecdSSage Weil err = fill_inode(dir, &rinfo->diri, rinfo->dirfrag, 10165b1daecdSSage Weil session, req->r_request_started, -1, 10175b1daecdSSage Weil &req->r_caps_reservation); 10185b1daecdSSage Weil if (err < 0) 10195b1daecdSSage Weil return err; 10206c5e50faSSage Weil } else { 10216c5e50faSSage Weil WARN_ON_ONCE(1); 10226c5e50faSSage Weil } 10235b1daecdSSage Weil } 10245b1daecdSSage Weil 10259358c6d4SSage Weil /* 10269358c6d4SSage Weil * ignore null lease/binding on snapdir ENOENT, or else we 10279358c6d4SSage Weil * will have trouble splicing in the virtual snapdir later 10289358c6d4SSage Weil */ 10299358c6d4SSage Weil if (rinfo->head->is_dentry && !req->r_aborted && 10306c5e50faSSage Weil req->r_locked_dir && 10319358c6d4SSage Weil (rinfo->head->is_target || strncmp(req->r_dentry->d_name.name, 10323d14c5d2SYehuda Sadeh fsc->mount_options->snapdir_name, 10339358c6d4SSage Weil req->r_dentry->d_name.len))) { 1034355da1ebSSage Weil /* 1035355da1ebSSage Weil * lookup link rename : null -> possibly existing inode 1036355da1ebSSage Weil * mknod symlink mkdir : null -> new inode 1037355da1ebSSage Weil * unlink : linked -> null 1038355da1ebSSage Weil */ 1039355da1ebSSage Weil struct inode *dir = req->r_locked_dir; 1040355da1ebSSage Weil struct dentry *dn = req->r_dentry; 1041355da1ebSSage Weil bool have_dir_cap, have_lease; 1042355da1ebSSage Weil 1043355da1ebSSage Weil BUG_ON(!dn); 1044355da1ebSSage Weil BUG_ON(!dir); 1045355da1ebSSage Weil BUG_ON(dn->d_parent->d_inode != dir); 1046355da1ebSSage Weil BUG_ON(ceph_ino(dir) != 1047355da1ebSSage Weil le64_to_cpu(rinfo->diri.in->ino)); 1048355da1ebSSage Weil BUG_ON(ceph_snap(dir) != 1049355da1ebSSage Weil le64_to_cpu(rinfo->diri.in->snapid)); 1050355da1ebSSage Weil 1051355da1ebSSage Weil /* do we have a lease on the whole dir? */ 1052355da1ebSSage Weil have_dir_cap = 1053355da1ebSSage Weil (le32_to_cpu(rinfo->diri.in->cap.caps) & 1054355da1ebSSage Weil CEPH_CAP_FILE_SHARED); 1055355da1ebSSage Weil 1056355da1ebSSage Weil /* do we have a dn lease? */ 1057355da1ebSSage Weil have_lease = have_dir_cap || 10582f90b852SSage Weil le32_to_cpu(rinfo->dlease->duration_ms); 1059355da1ebSSage Weil if (!have_lease) 1060355da1ebSSage Weil dout("fill_trace no dentry lease or dir cap\n"); 1061355da1ebSSage Weil 1062355da1ebSSage Weil /* rename? */ 1063355da1ebSSage Weil if (req->r_old_dentry && req->r_op == CEPH_MDS_OP_RENAME) { 1064355da1ebSSage Weil dout(" src %p '%.*s' dst %p '%.*s'\n", 1065355da1ebSSage Weil req->r_old_dentry, 1066355da1ebSSage Weil req->r_old_dentry->d_name.len, 1067355da1ebSSage Weil req->r_old_dentry->d_name.name, 1068355da1ebSSage Weil dn, dn->d_name.len, dn->d_name.name); 1069355da1ebSSage Weil dout("fill_trace doing d_move %p -> %p\n", 1070355da1ebSSage Weil req->r_old_dentry, dn); 1071c10f5e12SSage Weil 1072355da1ebSSage Weil d_move(req->r_old_dentry, dn); 1073355da1ebSSage Weil dout(" src %p '%.*s' dst %p '%.*s'\n", 1074355da1ebSSage Weil req->r_old_dentry, 1075355da1ebSSage Weil req->r_old_dentry->d_name.len, 1076355da1ebSSage Weil req->r_old_dentry->d_name.name, 1077355da1ebSSage Weil dn, dn->d_name.len, dn->d_name.name); 107881a6cf2dSSage Weil 1079c4a29f26SSage Weil /* ensure target dentry is invalidated, despite 1080c4a29f26SSage Weil rehashing bug in vfs_rename_dir */ 108181a6cf2dSSage Weil ceph_invalidate_dentry_lease(dn); 108281a6cf2dSSage Weil 108309adc80cSSage Weil /* 108409adc80cSSage Weil * d_move() puts the renamed dentry at the end of 108509adc80cSSage Weil * d_subdirs. We need to assign it an appropriate 10862f276c51SYan, Zheng * directory offset so we can behave when dir is 10872f276c51SYan, Zheng * complete. 108809adc80cSSage Weil */ 108909adc80cSSage Weil ceph_set_dentry_offset(req->r_old_dentry); 109009adc80cSSage Weil dout("dn %p gets new offset %lld\n", req->r_old_dentry, 10911cd3935bSSage Weil ceph_dentry(req->r_old_dentry)->offset); 109281a6cf2dSSage Weil 1093355da1ebSSage Weil dn = req->r_old_dentry; /* use old_dentry */ 1094355da1ebSSage Weil in = dn->d_inode; 1095355da1ebSSage Weil } 1096355da1ebSSage Weil 1097355da1ebSSage Weil /* null dentry? */ 1098355da1ebSSage Weil if (!rinfo->head->is_target) { 1099355da1ebSSage Weil dout("fill_trace null dentry\n"); 1100355da1ebSSage Weil if (dn->d_inode) { 1101355da1ebSSage Weil dout("d_delete %p\n", dn); 1102355da1ebSSage Weil d_delete(dn); 1103355da1ebSSage Weil } else { 1104355da1ebSSage Weil dout("d_instantiate %p NULL\n", dn); 1105355da1ebSSage Weil d_instantiate(dn, NULL); 1106355da1ebSSage Weil if (have_lease && d_unhashed(dn)) 1107355da1ebSSage Weil d_rehash(dn); 1108355da1ebSSage Weil update_dentry_lease(dn, rinfo->dlease, 1109355da1ebSSage Weil session, 1110355da1ebSSage Weil req->r_request_started); 1111355da1ebSSage Weil } 1112355da1ebSSage Weil goto done; 1113355da1ebSSage Weil } 1114355da1ebSSage Weil 1115355da1ebSSage Weil /* attach proper inode */ 1116355da1ebSSage Weil ininfo = rinfo->targeti.in; 1117355da1ebSSage Weil vino.ino = le64_to_cpu(ininfo->ino); 1118355da1ebSSage Weil vino.snap = le64_to_cpu(ininfo->snapid); 1119d8b16b3dSSage Weil in = dn->d_inode; 1120d8b16b3dSSage Weil if (!in) { 1121355da1ebSSage Weil in = ceph_get_inode(sb, vino); 1122355da1ebSSage Weil if (IS_ERR(in)) { 1123355da1ebSSage Weil pr_err("fill_trace bad get_inode " 1124355da1ebSSage Weil "%llx.%llx\n", vino.ino, vino.snap); 1125355da1ebSSage Weil err = PTR_ERR(in); 11262744c171SAl Viro d_drop(dn); 1127355da1ebSSage Weil goto done; 1128355da1ebSSage Weil } 1129467c5251SSage Weil dn = splice_dentry(dn, in, &have_lease, true); 1130355da1ebSSage Weil if (IS_ERR(dn)) { 1131355da1ebSSage Weil err = PTR_ERR(dn); 1132355da1ebSSage Weil goto done; 1133355da1ebSSage Weil } 1134355da1ebSSage Weil req->r_dentry = dn; /* may have spliced */ 113570b666c3SSage Weil ihold(in); 1136355da1ebSSage Weil } else if (ceph_ino(in) == vino.ino && 1137355da1ebSSage Weil ceph_snap(in) == vino.snap) { 113870b666c3SSage Weil ihold(in); 1139355da1ebSSage Weil } else { 1140355da1ebSSage Weil dout(" %p links to %p %llx.%llx, not %llx.%llx\n", 1141355da1ebSSage Weil dn, in, ceph_ino(in), ceph_snap(in), 1142355da1ebSSage Weil vino.ino, vino.snap); 1143355da1ebSSage Weil have_lease = false; 1144355da1ebSSage Weil in = NULL; 1145355da1ebSSage Weil } 1146355da1ebSSage Weil 1147355da1ebSSage Weil if (have_lease) 1148355da1ebSSage Weil update_dentry_lease(dn, rinfo->dlease, session, 1149355da1ebSSage Weil req->r_request_started); 1150355da1ebSSage Weil dout(" final dn %p\n", dn); 1151355da1ebSSage Weil i++; 115279f9f99aSSage Weil } else if ((req->r_op == CEPH_MDS_OP_LOOKUPSNAP || 115379f9f99aSSage Weil req->r_op == CEPH_MDS_OP_MKSNAP) && !req->r_aborted) { 1154355da1ebSSage Weil struct dentry *dn = req->r_dentry; 1155355da1ebSSage Weil 1156355da1ebSSage Weil /* fill out a snapdir LOOKUPSNAP dentry */ 1157355da1ebSSage Weil BUG_ON(!dn); 1158355da1ebSSage Weil BUG_ON(!req->r_locked_dir); 1159355da1ebSSage Weil BUG_ON(ceph_snap(req->r_locked_dir) != CEPH_SNAPDIR); 1160355da1ebSSage Weil ininfo = rinfo->targeti.in; 1161355da1ebSSage Weil vino.ino = le64_to_cpu(ininfo->ino); 1162355da1ebSSage Weil vino.snap = le64_to_cpu(ininfo->snapid); 1163355da1ebSSage Weil in = ceph_get_inode(sb, vino); 1164355da1ebSSage Weil if (IS_ERR(in)) { 1165355da1ebSSage Weil pr_err("fill_inode get_inode badness %llx.%llx\n", 1166355da1ebSSage Weil vino.ino, vino.snap); 1167355da1ebSSage Weil err = PTR_ERR(in); 1168355da1ebSSage Weil d_delete(dn); 1169355da1ebSSage Weil goto done; 1170355da1ebSSage Weil } 1171355da1ebSSage Weil dout(" linking snapped dir %p to dn %p\n", in, dn); 1172467c5251SSage Weil dn = splice_dentry(dn, in, NULL, true); 1173355da1ebSSage Weil if (IS_ERR(dn)) { 1174355da1ebSSage Weil err = PTR_ERR(dn); 1175355da1ebSSage Weil goto done; 1176355da1ebSSage Weil } 1177355da1ebSSage Weil req->r_dentry = dn; /* may have spliced */ 117870b666c3SSage Weil ihold(in); 1179355da1ebSSage Weil rinfo->head->is_dentry = 1; /* fool notrace handlers */ 1180355da1ebSSage Weil } 1181355da1ebSSage Weil 1182355da1ebSSage Weil if (rinfo->head->is_target) { 1183355da1ebSSage Weil vino.ino = le64_to_cpu(rinfo->targeti.in->ino); 1184355da1ebSSage Weil vino.snap = le64_to_cpu(rinfo->targeti.in->snapid); 1185355da1ebSSage Weil 1186355da1ebSSage Weil if (in == NULL || ceph_ino(in) != vino.ino || 1187355da1ebSSage Weil ceph_snap(in) != vino.snap) { 1188355da1ebSSage Weil in = ceph_get_inode(sb, vino); 1189355da1ebSSage Weil if (IS_ERR(in)) { 1190355da1ebSSage Weil err = PTR_ERR(in); 1191355da1ebSSage Weil goto done; 1192355da1ebSSage Weil } 1193355da1ebSSage Weil } 1194355da1ebSSage Weil req->r_target_inode = in; 1195355da1ebSSage Weil 1196355da1ebSSage Weil err = fill_inode(in, 1197355da1ebSSage Weil &rinfo->targeti, NULL, 1198355da1ebSSage Weil session, req->r_request_started, 1199355da1ebSSage Weil (le32_to_cpu(rinfo->head->result) == 0) ? 1200355da1ebSSage Weil req->r_fmode : -1, 1201355da1ebSSage Weil &req->r_caps_reservation); 1202355da1ebSSage Weil if (err < 0) { 1203355da1ebSSage Weil pr_err("fill_inode badness %p %llx.%llx\n", 1204355da1ebSSage Weil in, ceph_vinop(in)); 1205355da1ebSSage Weil goto done; 1206355da1ebSSage Weil } 1207355da1ebSSage Weil } 1208355da1ebSSage Weil 1209355da1ebSSage Weil done: 1210355da1ebSSage Weil dout("fill_trace done err=%d\n", err); 1211355da1ebSSage Weil return err; 1212355da1ebSSage Weil } 1213355da1ebSSage Weil 1214355da1ebSSage Weil /* 1215355da1ebSSage Weil * Prepopulate our cache with readdir results, leases, etc. 1216355da1ebSSage Weil */ 121779f9f99aSSage Weil static int readdir_prepopulate_inodes_only(struct ceph_mds_request *req, 121879f9f99aSSage Weil struct ceph_mds_session *session) 121979f9f99aSSage Weil { 122079f9f99aSSage Weil struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info; 122179f9f99aSSage Weil int i, err = 0; 122279f9f99aSSage Weil 122379f9f99aSSage Weil for (i = 0; i < rinfo->dir_nr; i++) { 122479f9f99aSSage Weil struct ceph_vino vino; 122579f9f99aSSage Weil struct inode *in; 122679f9f99aSSage Weil int rc; 122779f9f99aSSage Weil 122879f9f99aSSage Weil vino.ino = le64_to_cpu(rinfo->dir_in[i].in->ino); 122979f9f99aSSage Weil vino.snap = le64_to_cpu(rinfo->dir_in[i].in->snapid); 123079f9f99aSSage Weil 123179f9f99aSSage Weil in = ceph_get_inode(req->r_dentry->d_sb, vino); 123279f9f99aSSage Weil if (IS_ERR(in)) { 123379f9f99aSSage Weil err = PTR_ERR(in); 123479f9f99aSSage Weil dout("new_inode badness got %d\n", err); 123579f9f99aSSage Weil continue; 123679f9f99aSSage Weil } 123779f9f99aSSage Weil rc = fill_inode(in, &rinfo->dir_in[i], NULL, session, 123879f9f99aSSage Weil req->r_request_started, -1, 123979f9f99aSSage Weil &req->r_caps_reservation); 124079f9f99aSSage Weil if (rc < 0) { 124179f9f99aSSage Weil pr_err("fill_inode badness on %p got %d\n", in, rc); 124279f9f99aSSage Weil err = rc; 124379f9f99aSSage Weil continue; 124479f9f99aSSage Weil } 124579f9f99aSSage Weil } 124679f9f99aSSage Weil 124779f9f99aSSage Weil return err; 124879f9f99aSSage Weil } 124979f9f99aSSage Weil 1250355da1ebSSage Weil int ceph_readdir_prepopulate(struct ceph_mds_request *req, 1251355da1ebSSage Weil struct ceph_mds_session *session) 1252355da1ebSSage Weil { 1253355da1ebSSage Weil struct dentry *parent = req->r_dentry; 1254355da1ebSSage Weil struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info; 1255355da1ebSSage Weil struct qstr dname; 1256355da1ebSSage Weil struct dentry *dn; 1257355da1ebSSage Weil struct inode *in; 1258355da1ebSSage Weil int err = 0, i; 1259355da1ebSSage Weil struct inode *snapdir = NULL; 1260355da1ebSSage Weil struct ceph_mds_request_head *rhead = req->r_request->front.iov_base; 1261355da1ebSSage Weil u64 frag = le32_to_cpu(rhead->args.readdir.frag); 1262355da1ebSSage Weil struct ceph_dentry_info *di; 1263355da1ebSSage Weil 126479f9f99aSSage Weil if (req->r_aborted) 126579f9f99aSSage Weil return readdir_prepopulate_inodes_only(req, session); 126679f9f99aSSage Weil 1267355da1ebSSage Weil if (le32_to_cpu(rinfo->head->op) == CEPH_MDS_OP_LSSNAP) { 1268355da1ebSSage Weil snapdir = ceph_get_snapdir(parent->d_inode); 1269355da1ebSSage Weil parent = d_find_alias(snapdir); 1270355da1ebSSage Weil dout("readdir_prepopulate %d items under SNAPDIR dn %p\n", 1271355da1ebSSage Weil rinfo->dir_nr, parent); 1272355da1ebSSage Weil } else { 1273355da1ebSSage Weil dout("readdir_prepopulate %d items under dn %p\n", 1274355da1ebSSage Weil rinfo->dir_nr, parent); 1275355da1ebSSage Weil if (rinfo->dir_dir) 1276355da1ebSSage Weil ceph_fill_dirfrag(parent->d_inode, rinfo->dir_dir); 1277355da1ebSSage Weil } 1278355da1ebSSage Weil 1279355da1ebSSage Weil for (i = 0; i < rinfo->dir_nr; i++) { 1280355da1ebSSage Weil struct ceph_vino vino; 1281355da1ebSSage Weil 1282355da1ebSSage Weil dname.name = rinfo->dir_dname[i]; 1283355da1ebSSage Weil dname.len = rinfo->dir_dname_len[i]; 1284355da1ebSSage Weil dname.hash = full_name_hash(dname.name, dname.len); 1285355da1ebSSage Weil 1286355da1ebSSage Weil vino.ino = le64_to_cpu(rinfo->dir_in[i].in->ino); 1287355da1ebSSage Weil vino.snap = le64_to_cpu(rinfo->dir_in[i].in->snapid); 1288355da1ebSSage Weil 1289355da1ebSSage Weil retry_lookup: 1290355da1ebSSage Weil dn = d_lookup(parent, &dname); 1291355da1ebSSage Weil dout("d_lookup on parent=%p name=%.*s got %p\n", 1292355da1ebSSage Weil parent, dname.len, dname.name, dn); 1293355da1ebSSage Weil 1294355da1ebSSage Weil if (!dn) { 1295355da1ebSSage Weil dn = d_alloc(parent, &dname); 1296355da1ebSSage Weil dout("d_alloc %p '%.*s' = %p\n", parent, 1297355da1ebSSage Weil dname.len, dname.name, dn); 1298355da1ebSSage Weil if (dn == NULL) { 1299355da1ebSSage Weil dout("d_alloc badness\n"); 1300355da1ebSSage Weil err = -ENOMEM; 1301355da1ebSSage Weil goto out; 1302355da1ebSSage Weil } 1303355da1ebSSage Weil err = ceph_init_dentry(dn); 13048c696737SSage Weil if (err < 0) { 13058c696737SSage Weil dput(dn); 1306355da1ebSSage Weil goto out; 13078c696737SSage Weil } 1308355da1ebSSage Weil } else if (dn->d_inode && 1309355da1ebSSage Weil (ceph_ino(dn->d_inode) != vino.ino || 1310355da1ebSSage Weil ceph_snap(dn->d_inode) != vino.snap)) { 1311355da1ebSSage Weil dout(" dn %p points to wrong inode %p\n", 1312355da1ebSSage Weil dn, dn->d_inode); 1313355da1ebSSage Weil d_delete(dn); 1314355da1ebSSage Weil dput(dn); 1315355da1ebSSage Weil goto retry_lookup; 1316355da1ebSSage Weil } else { 1317355da1ebSSage Weil /* reorder parent's d_subdirs */ 13182fd6b7f5SNick Piggin spin_lock(&parent->d_lock); 13192fd6b7f5SNick Piggin spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED); 1320355da1ebSSage Weil list_move(&dn->d_u.d_child, &parent->d_subdirs); 1321355da1ebSSage Weil spin_unlock(&dn->d_lock); 13222fd6b7f5SNick Piggin spin_unlock(&parent->d_lock); 1323355da1ebSSage Weil } 1324355da1ebSSage Weil 1325355da1ebSSage Weil di = dn->d_fsdata; 1326355da1ebSSage Weil di->offset = ceph_make_fpos(frag, i + req->r_readdir_offset); 1327355da1ebSSage Weil 1328355da1ebSSage Weil /* inode */ 1329355da1ebSSage Weil if (dn->d_inode) { 1330355da1ebSSage Weil in = dn->d_inode; 1331355da1ebSSage Weil } else { 1332355da1ebSSage Weil in = ceph_get_inode(parent->d_sb, vino); 1333ac1f12efSDan Carpenter if (IS_ERR(in)) { 1334355da1ebSSage Weil dout("new_inode badness\n"); 13352744c171SAl Viro d_drop(dn); 1336355da1ebSSage Weil dput(dn); 1337ac1f12efSDan Carpenter err = PTR_ERR(in); 1338355da1ebSSage Weil goto out; 1339355da1ebSSage Weil } 1340467c5251SSage Weil dn = splice_dentry(dn, in, NULL, false); 1341d69ed05aSSage Weil if (IS_ERR(dn)) 1342d69ed05aSSage Weil dn = NULL; 1343355da1ebSSage Weil } 1344355da1ebSSage Weil 1345355da1ebSSage Weil if (fill_inode(in, &rinfo->dir_in[i], NULL, session, 1346355da1ebSSage Weil req->r_request_started, -1, 1347355da1ebSSage Weil &req->r_caps_reservation) < 0) { 1348355da1ebSSage Weil pr_err("fill_inode badness on %p\n", in); 1349d69ed05aSSage Weil goto next_item; 1350355da1ebSSage Weil } 1351d69ed05aSSage Weil if (dn) 1352355da1ebSSage Weil update_dentry_lease(dn, rinfo->dir_dlease[i], 1353d69ed05aSSage Weil req->r_session, 1354d69ed05aSSage Weil req->r_request_started); 1355d69ed05aSSage Weil next_item: 1356d69ed05aSSage Weil if (dn) 1357355da1ebSSage Weil dput(dn); 1358355da1ebSSage Weil } 1359355da1ebSSage Weil req->r_did_prepopulate = true; 1360355da1ebSSage Weil 1361355da1ebSSage Weil out: 1362355da1ebSSage Weil if (snapdir) { 1363355da1ebSSage Weil iput(snapdir); 1364355da1ebSSage Weil dput(parent); 1365355da1ebSSage Weil } 1366355da1ebSSage Weil dout("readdir_prepopulate done\n"); 1367355da1ebSSage Weil return err; 1368355da1ebSSage Weil } 1369355da1ebSSage Weil 1370355da1ebSSage Weil int ceph_inode_set_size(struct inode *inode, loff_t size) 1371355da1ebSSage Weil { 1372355da1ebSSage Weil struct ceph_inode_info *ci = ceph_inode(inode); 1373355da1ebSSage Weil int ret = 0; 1374355da1ebSSage Weil 1375be655596SSage Weil spin_lock(&ci->i_ceph_lock); 1376355da1ebSSage Weil dout("set_size %p %llu -> %llu\n", inode, inode->i_size, size); 1377355da1ebSSage Weil inode->i_size = size; 1378355da1ebSSage Weil inode->i_blocks = (size + (1 << 9) - 1) >> 9; 1379355da1ebSSage Weil 1380355da1ebSSage Weil /* tell the MDS if we are approaching max_size */ 1381355da1ebSSage Weil if ((size << 1) >= ci->i_max_size && 1382355da1ebSSage Weil (ci->i_reported_size << 1) < ci->i_max_size) 1383355da1ebSSage Weil ret = 1; 1384355da1ebSSage Weil 1385be655596SSage Weil spin_unlock(&ci->i_ceph_lock); 1386355da1ebSSage Weil return ret; 1387355da1ebSSage Weil } 1388355da1ebSSage Weil 1389355da1ebSSage Weil /* 1390355da1ebSSage Weil * Write back inode data in a worker thread. (This can't be done 1391355da1ebSSage Weil * in the message handler context.) 1392355da1ebSSage Weil */ 13933c6f6b79SSage Weil void ceph_queue_writeback(struct inode *inode) 13943c6f6b79SSage Weil { 139515a2015fSSage Weil ihold(inode); 13963c6f6b79SSage Weil if (queue_work(ceph_inode_to_client(inode)->wb_wq, 13973c6f6b79SSage Weil &ceph_inode(inode)->i_wb_work)) { 13982c27c9a5SSage Weil dout("ceph_queue_writeback %p\n", inode); 13993c6f6b79SSage Weil } else { 14002c27c9a5SSage Weil dout("ceph_queue_writeback %p failed\n", inode); 140115a2015fSSage Weil iput(inode); 14023c6f6b79SSage Weil } 14033c6f6b79SSage Weil } 14043c6f6b79SSage Weil 14053c6f6b79SSage Weil static void ceph_writeback_work(struct work_struct *work) 1406355da1ebSSage Weil { 1407355da1ebSSage Weil struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info, 1408355da1ebSSage Weil i_wb_work); 1409355da1ebSSage Weil struct inode *inode = &ci->vfs_inode; 1410355da1ebSSage Weil 1411355da1ebSSage Weil dout("writeback %p\n", inode); 1412355da1ebSSage Weil filemap_fdatawrite(&inode->i_data); 1413355da1ebSSage Weil iput(inode); 1414355da1ebSSage Weil } 1415355da1ebSSage Weil 1416355da1ebSSage Weil /* 14173c6f6b79SSage Weil * queue an async invalidation 14183c6f6b79SSage Weil */ 14193c6f6b79SSage Weil void ceph_queue_invalidate(struct inode *inode) 14203c6f6b79SSage Weil { 142115a2015fSSage Weil ihold(inode); 14223c6f6b79SSage Weil if (queue_work(ceph_inode_to_client(inode)->pg_inv_wq, 14233c6f6b79SSage Weil &ceph_inode(inode)->i_pg_inv_work)) { 14243c6f6b79SSage Weil dout("ceph_queue_invalidate %p\n", inode); 14253c6f6b79SSage Weil } else { 14263c6f6b79SSage Weil dout("ceph_queue_invalidate %p failed\n", inode); 142715a2015fSSage Weil iput(inode); 14283c6f6b79SSage Weil } 14293c6f6b79SSage Weil } 14303c6f6b79SSage Weil 14313c6f6b79SSage Weil /* 1432355da1ebSSage Weil * Invalidate inode pages in a worker thread. (This can't be done 1433355da1ebSSage Weil * in the message handler context.) 1434355da1ebSSage Weil */ 14353c6f6b79SSage Weil static void ceph_invalidate_work(struct work_struct *work) 1436355da1ebSSage Weil { 1437355da1ebSSage Weil struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info, 1438355da1ebSSage Weil i_pg_inv_work); 1439355da1ebSSage Weil struct inode *inode = &ci->vfs_inode; 1440355da1ebSSage Weil u32 orig_gen; 1441355da1ebSSage Weil int check = 0; 1442355da1ebSSage Weil 1443b0d7c223SYan, Zheng mutex_lock(&ci->i_truncate_mutex); 1444be655596SSage Weil spin_lock(&ci->i_ceph_lock); 1445355da1ebSSage Weil dout("invalidate_pages %p gen %d revoking %d\n", inode, 1446355da1ebSSage Weil ci->i_rdcache_gen, ci->i_rdcache_revoking); 1447cd045cb4SSage Weil if (ci->i_rdcache_revoking != ci->i_rdcache_gen) { 1448355da1ebSSage Weil /* nevermind! */ 1449be655596SSage Weil spin_unlock(&ci->i_ceph_lock); 1450b0d7c223SYan, Zheng mutex_unlock(&ci->i_truncate_mutex); 1451355da1ebSSage Weil goto out; 1452355da1ebSSage Weil } 1453355da1ebSSage Weil orig_gen = ci->i_rdcache_gen; 1454be655596SSage Weil spin_unlock(&ci->i_ceph_lock); 1455355da1ebSSage Weil 1456b0d7c223SYan, Zheng truncate_inode_pages(inode->i_mapping, 0); 1457355da1ebSSage Weil 1458be655596SSage Weil spin_lock(&ci->i_ceph_lock); 1459cd045cb4SSage Weil if (orig_gen == ci->i_rdcache_gen && 1460cd045cb4SSage Weil orig_gen == ci->i_rdcache_revoking) { 1461355da1ebSSage Weil dout("invalidate_pages %p gen %d successful\n", inode, 1462355da1ebSSage Weil ci->i_rdcache_gen); 1463cd045cb4SSage Weil ci->i_rdcache_revoking--; 1464355da1ebSSage Weil check = 1; 1465355da1ebSSage Weil } else { 1466cd045cb4SSage Weil dout("invalidate_pages %p gen %d raced, now %d revoking %d\n", 1467cd045cb4SSage Weil inode, orig_gen, ci->i_rdcache_gen, 1468cd045cb4SSage Weil ci->i_rdcache_revoking); 1469355da1ebSSage Weil } 1470be655596SSage Weil spin_unlock(&ci->i_ceph_lock); 1471b0d7c223SYan, Zheng mutex_unlock(&ci->i_truncate_mutex); 1472355da1ebSSage Weil 1473355da1ebSSage Weil if (check) 1474355da1ebSSage Weil ceph_check_caps(ci, 0, NULL); 1475355da1ebSSage Weil out: 1476355da1ebSSage Weil iput(inode); 1477355da1ebSSage Weil } 1478355da1ebSSage Weil 1479355da1ebSSage Weil 1480355da1ebSSage Weil /* 14813f99969fSYan, Zheng * called by trunc_wq; 1482355da1ebSSage Weil * 1483355da1ebSSage Weil * We also truncate in a separate thread as well. 1484355da1ebSSage Weil */ 14853c6f6b79SSage Weil static void ceph_vmtruncate_work(struct work_struct *work) 1486355da1ebSSage Weil { 1487355da1ebSSage Weil struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info, 1488355da1ebSSage Weil i_vmtruncate_work); 1489355da1ebSSage Weil struct inode *inode = &ci->vfs_inode; 1490355da1ebSSage Weil 1491355da1ebSSage Weil dout("vmtruncate_work %p\n", inode); 1492b415bf4fSYan, Zheng __ceph_do_pending_vmtruncate(inode); 1493355da1ebSSage Weil iput(inode); 1494355da1ebSSage Weil } 1495355da1ebSSage Weil 1496355da1ebSSage Weil /* 14973c6f6b79SSage Weil * Queue an async vmtruncate. If we fail to queue work, we will handle 14983c6f6b79SSage Weil * the truncation the next time we call __ceph_do_pending_vmtruncate. 14993c6f6b79SSage Weil */ 15003c6f6b79SSage Weil void ceph_queue_vmtruncate(struct inode *inode) 15013c6f6b79SSage Weil { 15023c6f6b79SSage Weil struct ceph_inode_info *ci = ceph_inode(inode); 15033c6f6b79SSage Weil 150415a2015fSSage Weil ihold(inode); 150599ccbd22SMilosz Tanski 1506640ef79dSCheng Renquan if (queue_work(ceph_sb_to_client(inode->i_sb)->trunc_wq, 15073c6f6b79SSage Weil &ci->i_vmtruncate_work)) { 15083c6f6b79SSage Weil dout("ceph_queue_vmtruncate %p\n", inode); 15093c6f6b79SSage Weil } else { 15103c6f6b79SSage Weil dout("ceph_queue_vmtruncate %p failed, pending=%d\n", 15113c6f6b79SSage Weil inode, ci->i_truncate_pending); 151215a2015fSSage Weil iput(inode); 15133c6f6b79SSage Weil } 15143c6f6b79SSage Weil } 15153c6f6b79SSage Weil 15163c6f6b79SSage Weil /* 1517355da1ebSSage Weil * Make sure any pending truncation is applied before doing anything 1518355da1ebSSage Weil * that may depend on it. 1519355da1ebSSage Weil */ 1520b415bf4fSYan, Zheng void __ceph_do_pending_vmtruncate(struct inode *inode) 1521355da1ebSSage Weil { 1522355da1ebSSage Weil struct ceph_inode_info *ci = ceph_inode(inode); 1523355da1ebSSage Weil u64 to; 1524a85f50b6SYan, Zheng int wrbuffer_refs, finish = 0; 1525355da1ebSSage Weil 1526b0d7c223SYan, Zheng mutex_lock(&ci->i_truncate_mutex); 1527355da1ebSSage Weil retry: 1528be655596SSage Weil spin_lock(&ci->i_ceph_lock); 1529355da1ebSSage Weil if (ci->i_truncate_pending == 0) { 1530355da1ebSSage Weil dout("__do_pending_vmtruncate %p none pending\n", inode); 1531be655596SSage Weil spin_unlock(&ci->i_ceph_lock); 1532b0d7c223SYan, Zheng mutex_unlock(&ci->i_truncate_mutex); 1533355da1ebSSage Weil return; 1534355da1ebSSage Weil } 1535355da1ebSSage Weil 1536355da1ebSSage Weil /* 1537355da1ebSSage Weil * make sure any dirty snapped pages are flushed before we 1538355da1ebSSage Weil * possibly truncate them.. so write AND block! 1539355da1ebSSage Weil */ 1540355da1ebSSage Weil if (ci->i_wrbuffer_ref_head < ci->i_wrbuffer_ref) { 1541355da1ebSSage Weil dout("__do_pending_vmtruncate %p flushing snaps first\n", 1542355da1ebSSage Weil inode); 1543be655596SSage Weil spin_unlock(&ci->i_ceph_lock); 1544355da1ebSSage Weil filemap_write_and_wait_range(&inode->i_data, 0, 1545355da1ebSSage Weil inode->i_sb->s_maxbytes); 1546355da1ebSSage Weil goto retry; 1547355da1ebSSage Weil } 1548355da1ebSSage Weil 1549b0d7c223SYan, Zheng /* there should be no reader or writer */ 1550b0d7c223SYan, Zheng WARN_ON_ONCE(ci->i_rd_ref || ci->i_wr_ref); 1551b0d7c223SYan, Zheng 1552355da1ebSSage Weil to = ci->i_truncate_size; 1553355da1ebSSage Weil wrbuffer_refs = ci->i_wrbuffer_ref; 1554355da1ebSSage Weil dout("__do_pending_vmtruncate %p (%d) to %lld\n", inode, 1555355da1ebSSage Weil ci->i_truncate_pending, to); 1556be655596SSage Weil spin_unlock(&ci->i_ceph_lock); 1557355da1ebSSage Weil 1558355da1ebSSage Weil truncate_inode_pages(inode->i_mapping, to); 1559355da1ebSSage Weil 1560be655596SSage Weil spin_lock(&ci->i_ceph_lock); 1561a85f50b6SYan, Zheng if (to == ci->i_truncate_size) { 1562a85f50b6SYan, Zheng ci->i_truncate_pending = 0; 1563a85f50b6SYan, Zheng finish = 1; 1564a85f50b6SYan, Zheng } 1565be655596SSage Weil spin_unlock(&ci->i_ceph_lock); 1566a85f50b6SYan, Zheng if (!finish) 1567a85f50b6SYan, Zheng goto retry; 1568355da1ebSSage Weil 1569b0d7c223SYan, Zheng mutex_unlock(&ci->i_truncate_mutex); 1570b0d7c223SYan, Zheng 1571355da1ebSSage Weil if (wrbuffer_refs == 0) 1572355da1ebSSage Weil ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL); 1573a85f50b6SYan, Zheng 157403066f23SYehuda Sadeh wake_up_all(&ci->i_cap_wq); 1575355da1ebSSage Weil } 1576355da1ebSSage Weil 1577355da1ebSSage Weil /* 1578355da1ebSSage Weil * symlinks 1579355da1ebSSage Weil */ 1580355da1ebSSage Weil static void *ceph_sym_follow_link(struct dentry *dentry, struct nameidata *nd) 1581355da1ebSSage Weil { 1582355da1ebSSage Weil struct ceph_inode_info *ci = ceph_inode(dentry->d_inode); 1583355da1ebSSage Weil nd_set_link(nd, ci->i_symlink); 1584355da1ebSSage Weil return NULL; 1585355da1ebSSage Weil } 1586355da1ebSSage Weil 1587355da1ebSSage Weil static const struct inode_operations ceph_symlink_iops = { 1588355da1ebSSage Weil .readlink = generic_readlink, 1589355da1ebSSage Weil .follow_link = ceph_sym_follow_link, 15900b932672SYan, Zheng .setattr = ceph_setattr, 15910b932672SYan, Zheng .getattr = ceph_getattr, 15920b932672SYan, Zheng .setxattr = ceph_setxattr, 15930b932672SYan, Zheng .getxattr = ceph_getxattr, 15940b932672SYan, Zheng .listxattr = ceph_listxattr, 15950b932672SYan, Zheng .removexattr = ceph_removexattr, 1596355da1ebSSage Weil }; 1597355da1ebSSage Weil 1598355da1ebSSage Weil /* 1599355da1ebSSage Weil * setattr 1600355da1ebSSage Weil */ 1601355da1ebSSage Weil int ceph_setattr(struct dentry *dentry, struct iattr *attr) 1602355da1ebSSage Weil { 1603355da1ebSSage Weil struct inode *inode = dentry->d_inode; 1604355da1ebSSage Weil struct ceph_inode_info *ci = ceph_inode(inode); 16055f21c96dSSage Weil struct inode *parent_inode; 1606355da1ebSSage Weil const unsigned int ia_valid = attr->ia_valid; 1607355da1ebSSage Weil struct ceph_mds_request *req; 16083d14c5d2SYehuda Sadeh struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc; 1609355da1ebSSage Weil int issued; 1610355da1ebSSage Weil int release = 0, dirtied = 0; 1611355da1ebSSage Weil int mask = 0; 1612355da1ebSSage Weil int err = 0; 1613fca65b4aSSage Weil int inode_dirty_flags = 0; 1614355da1ebSSage Weil 1615355da1ebSSage Weil if (ceph_snap(inode) != CEPH_NOSNAP) 1616355da1ebSSage Weil return -EROFS; 1617355da1ebSSage Weil 1618355da1ebSSage Weil err = inode_change_ok(inode, attr); 1619355da1ebSSage Weil if (err != 0) 1620355da1ebSSage Weil return err; 1621355da1ebSSage Weil 1622355da1ebSSage Weil req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETATTR, 1623355da1ebSSage Weil USE_AUTH_MDS); 1624355da1ebSSage Weil if (IS_ERR(req)) 1625355da1ebSSage Weil return PTR_ERR(req); 1626355da1ebSSage Weil 1627be655596SSage Weil spin_lock(&ci->i_ceph_lock); 1628355da1ebSSage Weil issued = __ceph_caps_issued(ci, NULL); 1629355da1ebSSage Weil dout("setattr %p issued %s\n", inode, ceph_cap_string(issued)); 1630355da1ebSSage Weil 1631355da1ebSSage Weil if (ia_valid & ATTR_UID) { 1632355da1ebSSage Weil dout("setattr %p uid %d -> %d\n", inode, 1633bd2bae6aSEric W. Biederman from_kuid(&init_user_ns, inode->i_uid), 1634bd2bae6aSEric W. Biederman from_kuid(&init_user_ns, attr->ia_uid)); 1635355da1ebSSage Weil if (issued & CEPH_CAP_AUTH_EXCL) { 1636355da1ebSSage Weil inode->i_uid = attr->ia_uid; 1637355da1ebSSage Weil dirtied |= CEPH_CAP_AUTH_EXCL; 1638355da1ebSSage Weil } else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 || 1639ab871b90SEric W. Biederman !uid_eq(attr->ia_uid, inode->i_uid)) { 1640ab871b90SEric W. Biederman req->r_args.setattr.uid = cpu_to_le32( 1641ab871b90SEric W. Biederman from_kuid(&init_user_ns, attr->ia_uid)); 1642355da1ebSSage Weil mask |= CEPH_SETATTR_UID; 1643355da1ebSSage Weil release |= CEPH_CAP_AUTH_SHARED; 1644355da1ebSSage Weil } 1645355da1ebSSage Weil } 1646355da1ebSSage Weil if (ia_valid & ATTR_GID) { 1647355da1ebSSage Weil dout("setattr %p gid %d -> %d\n", inode, 1648bd2bae6aSEric W. Biederman from_kgid(&init_user_ns, inode->i_gid), 1649bd2bae6aSEric W. Biederman from_kgid(&init_user_ns, attr->ia_gid)); 1650355da1ebSSage Weil if (issued & CEPH_CAP_AUTH_EXCL) { 1651355da1ebSSage Weil inode->i_gid = attr->ia_gid; 1652355da1ebSSage Weil dirtied |= CEPH_CAP_AUTH_EXCL; 1653355da1ebSSage Weil } else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 || 1654ab871b90SEric W. Biederman !gid_eq(attr->ia_gid, inode->i_gid)) { 1655ab871b90SEric W. Biederman req->r_args.setattr.gid = cpu_to_le32( 1656ab871b90SEric W. Biederman from_kgid(&init_user_ns, attr->ia_gid)); 1657355da1ebSSage Weil mask |= CEPH_SETATTR_GID; 1658355da1ebSSage Weil release |= CEPH_CAP_AUTH_SHARED; 1659355da1ebSSage Weil } 1660355da1ebSSage Weil } 1661355da1ebSSage Weil if (ia_valid & ATTR_MODE) { 1662355da1ebSSage Weil dout("setattr %p mode 0%o -> 0%o\n", inode, inode->i_mode, 1663355da1ebSSage Weil attr->ia_mode); 1664355da1ebSSage Weil if (issued & CEPH_CAP_AUTH_EXCL) { 1665355da1ebSSage Weil inode->i_mode = attr->ia_mode; 1666355da1ebSSage Weil dirtied |= CEPH_CAP_AUTH_EXCL; 1667355da1ebSSage Weil } else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 || 1668355da1ebSSage Weil attr->ia_mode != inode->i_mode) { 1669355da1ebSSage Weil req->r_args.setattr.mode = cpu_to_le32(attr->ia_mode); 1670355da1ebSSage Weil mask |= CEPH_SETATTR_MODE; 1671355da1ebSSage Weil release |= CEPH_CAP_AUTH_SHARED; 1672355da1ebSSage Weil } 1673355da1ebSSage Weil } 1674355da1ebSSage Weil 1675355da1ebSSage Weil if (ia_valid & ATTR_ATIME) { 1676355da1ebSSage Weil dout("setattr %p atime %ld.%ld -> %ld.%ld\n", inode, 1677355da1ebSSage Weil inode->i_atime.tv_sec, inode->i_atime.tv_nsec, 1678355da1ebSSage Weil attr->ia_atime.tv_sec, attr->ia_atime.tv_nsec); 1679355da1ebSSage Weil if (issued & CEPH_CAP_FILE_EXCL) { 1680355da1ebSSage Weil ci->i_time_warp_seq++; 1681355da1ebSSage Weil inode->i_atime = attr->ia_atime; 1682355da1ebSSage Weil dirtied |= CEPH_CAP_FILE_EXCL; 1683355da1ebSSage Weil } else if ((issued & CEPH_CAP_FILE_WR) && 1684355da1ebSSage Weil timespec_compare(&inode->i_atime, 1685355da1ebSSage Weil &attr->ia_atime) < 0) { 1686355da1ebSSage Weil inode->i_atime = attr->ia_atime; 1687355da1ebSSage Weil dirtied |= CEPH_CAP_FILE_WR; 1688355da1ebSSage Weil } else if ((issued & CEPH_CAP_FILE_SHARED) == 0 || 1689355da1ebSSage Weil !timespec_equal(&inode->i_atime, &attr->ia_atime)) { 1690355da1ebSSage Weil ceph_encode_timespec(&req->r_args.setattr.atime, 1691355da1ebSSage Weil &attr->ia_atime); 1692355da1ebSSage Weil mask |= CEPH_SETATTR_ATIME; 1693355da1ebSSage Weil release |= CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_RD | 1694355da1ebSSage Weil CEPH_CAP_FILE_WR; 1695355da1ebSSage Weil } 1696355da1ebSSage Weil } 1697355da1ebSSage Weil if (ia_valid & ATTR_MTIME) { 1698355da1ebSSage Weil dout("setattr %p mtime %ld.%ld -> %ld.%ld\n", inode, 1699355da1ebSSage Weil inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec, 1700355da1ebSSage Weil attr->ia_mtime.tv_sec, attr->ia_mtime.tv_nsec); 1701355da1ebSSage Weil if (issued & CEPH_CAP_FILE_EXCL) { 1702355da1ebSSage Weil ci->i_time_warp_seq++; 1703355da1ebSSage Weil inode->i_mtime = attr->ia_mtime; 1704355da1ebSSage Weil dirtied |= CEPH_CAP_FILE_EXCL; 1705355da1ebSSage Weil } else if ((issued & CEPH_CAP_FILE_WR) && 1706355da1ebSSage Weil timespec_compare(&inode->i_mtime, 1707355da1ebSSage Weil &attr->ia_mtime) < 0) { 1708355da1ebSSage Weil inode->i_mtime = attr->ia_mtime; 1709355da1ebSSage Weil dirtied |= CEPH_CAP_FILE_WR; 1710355da1ebSSage Weil } else if ((issued & CEPH_CAP_FILE_SHARED) == 0 || 1711355da1ebSSage Weil !timespec_equal(&inode->i_mtime, &attr->ia_mtime)) { 1712355da1ebSSage Weil ceph_encode_timespec(&req->r_args.setattr.mtime, 1713355da1ebSSage Weil &attr->ia_mtime); 1714355da1ebSSage Weil mask |= CEPH_SETATTR_MTIME; 1715355da1ebSSage Weil release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_RD | 1716355da1ebSSage Weil CEPH_CAP_FILE_WR; 1717355da1ebSSage Weil } 1718355da1ebSSage Weil } 1719355da1ebSSage Weil if (ia_valid & ATTR_SIZE) { 1720355da1ebSSage Weil dout("setattr %p size %lld -> %lld\n", inode, 1721355da1ebSSage Weil inode->i_size, attr->ia_size); 1722355da1ebSSage Weil if (attr->ia_size > inode->i_sb->s_maxbytes) { 1723355da1ebSSage Weil err = -EINVAL; 1724355da1ebSSage Weil goto out; 1725355da1ebSSage Weil } 1726355da1ebSSage Weil if ((issued & CEPH_CAP_FILE_EXCL) && 1727355da1ebSSage Weil attr->ia_size > inode->i_size) { 1728355da1ebSSage Weil inode->i_size = attr->ia_size; 1729355da1ebSSage Weil inode->i_blocks = 1730355da1ebSSage Weil (attr->ia_size + (1 << 9) - 1) >> 9; 1731355da1ebSSage Weil inode->i_ctime = attr->ia_ctime; 1732355da1ebSSage Weil ci->i_reported_size = attr->ia_size; 1733355da1ebSSage Weil dirtied |= CEPH_CAP_FILE_EXCL; 1734355da1ebSSage Weil } else if ((issued & CEPH_CAP_FILE_SHARED) == 0 || 1735355da1ebSSage Weil attr->ia_size != inode->i_size) { 1736355da1ebSSage Weil req->r_args.setattr.size = cpu_to_le64(attr->ia_size); 1737355da1ebSSage Weil req->r_args.setattr.old_size = 1738355da1ebSSage Weil cpu_to_le64(inode->i_size); 1739355da1ebSSage Weil mask |= CEPH_SETATTR_SIZE; 1740355da1ebSSage Weil release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_RD | 1741355da1ebSSage Weil CEPH_CAP_FILE_WR; 1742355da1ebSSage Weil } 1743355da1ebSSage Weil } 1744355da1ebSSage Weil 1745355da1ebSSage Weil /* these do nothing */ 1746355da1ebSSage Weil if (ia_valid & ATTR_CTIME) { 1747355da1ebSSage Weil bool only = (ia_valid & (ATTR_SIZE|ATTR_MTIME|ATTR_ATIME| 1748355da1ebSSage Weil ATTR_MODE|ATTR_UID|ATTR_GID)) == 0; 1749355da1ebSSage Weil dout("setattr %p ctime %ld.%ld -> %ld.%ld (%s)\n", inode, 1750355da1ebSSage Weil inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, 1751355da1ebSSage Weil attr->ia_ctime.tv_sec, attr->ia_ctime.tv_nsec, 1752355da1ebSSage Weil only ? "ctime only" : "ignored"); 1753355da1ebSSage Weil inode->i_ctime = attr->ia_ctime; 1754355da1ebSSage Weil if (only) { 1755355da1ebSSage Weil /* 1756355da1ebSSage Weil * if kernel wants to dirty ctime but nothing else, 1757355da1ebSSage Weil * we need to choose a cap to dirty under, or do 1758355da1ebSSage Weil * a almost-no-op setattr 1759355da1ebSSage Weil */ 1760355da1ebSSage Weil if (issued & CEPH_CAP_AUTH_EXCL) 1761355da1ebSSage Weil dirtied |= CEPH_CAP_AUTH_EXCL; 1762355da1ebSSage Weil else if (issued & CEPH_CAP_FILE_EXCL) 1763355da1ebSSage Weil dirtied |= CEPH_CAP_FILE_EXCL; 1764355da1ebSSage Weil else if (issued & CEPH_CAP_XATTR_EXCL) 1765355da1ebSSage Weil dirtied |= CEPH_CAP_XATTR_EXCL; 1766355da1ebSSage Weil else 1767355da1ebSSage Weil mask |= CEPH_SETATTR_CTIME; 1768355da1ebSSage Weil } 1769355da1ebSSage Weil } 1770355da1ebSSage Weil if (ia_valid & ATTR_FILE) 1771355da1ebSSage Weil dout("setattr %p ATTR_FILE ... hrm!\n", inode); 1772355da1ebSSage Weil 1773355da1ebSSage Weil if (dirtied) { 1774fca65b4aSSage Weil inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied); 1775355da1ebSSage Weil inode->i_ctime = CURRENT_TIME; 1776355da1ebSSage Weil } 1777355da1ebSSage Weil 1778355da1ebSSage Weil release &= issued; 1779be655596SSage Weil spin_unlock(&ci->i_ceph_lock); 1780355da1ebSSage Weil 1781fca65b4aSSage Weil if (inode_dirty_flags) 1782fca65b4aSSage Weil __mark_inode_dirty(inode, inode_dirty_flags); 1783fca65b4aSSage Weil 1784355da1ebSSage Weil if (mask) { 178570b666c3SSage Weil req->r_inode = inode; 178670b666c3SSage Weil ihold(inode); 1787355da1ebSSage Weil req->r_inode_drop = release; 1788355da1ebSSage Weil req->r_args.setattr.mask = cpu_to_le32(mask); 1789355da1ebSSage Weil req->r_num_caps = 1; 17905f21c96dSSage Weil parent_inode = ceph_get_dentry_parent_inode(dentry); 1791355da1ebSSage Weil err = ceph_mdsc_do_request(mdsc, parent_inode, req); 17925f21c96dSSage Weil iput(parent_inode); 1793355da1ebSSage Weil } 1794355da1ebSSage Weil dout("setattr %p result=%d (%s locally, %d remote)\n", inode, err, 1795355da1ebSSage Weil ceph_cap_string(dirtied), mask); 1796355da1ebSSage Weil 1797355da1ebSSage Weil ceph_mdsc_put_request(req); 1798b0d7c223SYan, Zheng if (mask & CEPH_SETATTR_SIZE) 1799b415bf4fSYan, Zheng __ceph_do_pending_vmtruncate(inode); 1800355da1ebSSage Weil return err; 1801355da1ebSSage Weil out: 1802be655596SSage Weil spin_unlock(&ci->i_ceph_lock); 1803355da1ebSSage Weil ceph_mdsc_put_request(req); 1804355da1ebSSage Weil return err; 1805355da1ebSSage Weil } 1806355da1ebSSage Weil 1807355da1ebSSage Weil /* 1808355da1ebSSage Weil * Verify that we have a lease on the given mask. If not, 1809355da1ebSSage Weil * do a getattr against an mds. 1810355da1ebSSage Weil */ 1811355da1ebSSage Weil int ceph_do_getattr(struct inode *inode, int mask) 1812355da1ebSSage Weil { 18133d14c5d2SYehuda Sadeh struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb); 18143d14c5d2SYehuda Sadeh struct ceph_mds_client *mdsc = fsc->mdsc; 1815355da1ebSSage Weil struct ceph_mds_request *req; 1816355da1ebSSage Weil int err; 1817355da1ebSSage Weil 1818355da1ebSSage Weil if (ceph_snap(inode) == CEPH_SNAPDIR) { 1819355da1ebSSage Weil dout("do_getattr inode %p SNAPDIR\n", inode); 1820355da1ebSSage Weil return 0; 1821355da1ebSSage Weil } 1822355da1ebSSage Weil 1823b7495fc2SSage Weil dout("do_getattr inode %p mask %s mode 0%o\n", inode, ceph_cap_string(mask), inode->i_mode); 1824355da1ebSSage Weil if (ceph_caps_issued_mask(ceph_inode(inode), mask, 1)) 1825355da1ebSSage Weil return 0; 1826355da1ebSSage Weil 1827355da1ebSSage Weil req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS); 1828355da1ebSSage Weil if (IS_ERR(req)) 1829355da1ebSSage Weil return PTR_ERR(req); 183070b666c3SSage Weil req->r_inode = inode; 183170b666c3SSage Weil ihold(inode); 1832355da1ebSSage Weil req->r_num_caps = 1; 1833355da1ebSSage Weil req->r_args.getattr.mask = cpu_to_le32(mask); 1834355da1ebSSage Weil err = ceph_mdsc_do_request(mdsc, NULL, req); 1835355da1ebSSage Weil ceph_mdsc_put_request(req); 1836355da1ebSSage Weil dout("do_getattr result=%d\n", err); 1837355da1ebSSage Weil return err; 1838355da1ebSSage Weil } 1839355da1ebSSage Weil 1840355da1ebSSage Weil 1841355da1ebSSage Weil /* 1842355da1ebSSage Weil * Check inode permissions. We verify we have a valid value for 1843355da1ebSSage Weil * the AUTH cap, then call the generic handler. 1844355da1ebSSage Weil */ 184510556cb2SAl Viro int ceph_permission(struct inode *inode, int mask) 1846355da1ebSSage Weil { 1847b74c79e9SNick Piggin int err; 1848b74c79e9SNick Piggin 184910556cb2SAl Viro if (mask & MAY_NOT_BLOCK) 1850b74c79e9SNick Piggin return -ECHILD; 1851b74c79e9SNick Piggin 1852b74c79e9SNick Piggin err = ceph_do_getattr(inode, CEPH_CAP_AUTH_SHARED); 1853355da1ebSSage Weil 1854355da1ebSSage Weil if (!err) 18552830ba7fSAl Viro err = generic_permission(inode, mask); 1856355da1ebSSage Weil return err; 1857355da1ebSSage Weil } 1858355da1ebSSage Weil 1859355da1ebSSage Weil /* 1860355da1ebSSage Weil * Get all attributes. Hopefully somedata we'll have a statlite() 1861355da1ebSSage Weil * and can limit the fields we require to be accurate. 1862355da1ebSSage Weil */ 1863355da1ebSSage Weil int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry, 1864355da1ebSSage Weil struct kstat *stat) 1865355da1ebSSage Weil { 1866355da1ebSSage Weil struct inode *inode = dentry->d_inode; 1867232d4b01SSage Weil struct ceph_inode_info *ci = ceph_inode(inode); 1868355da1ebSSage Weil int err; 1869355da1ebSSage Weil 1870355da1ebSSage Weil err = ceph_do_getattr(inode, CEPH_STAT_CAP_INODE_ALL); 1871355da1ebSSage Weil if (!err) { 1872355da1ebSSage Weil generic_fillattr(inode, stat); 1873ad1fee96SYehuda Sadeh stat->ino = ceph_translate_ino(inode->i_sb, inode->i_ino); 1874355da1ebSSage Weil if (ceph_snap(inode) != CEPH_NOSNAP) 1875355da1ebSSage Weil stat->dev = ceph_snap(inode); 1876355da1ebSSage Weil else 1877355da1ebSSage Weil stat->dev = 0; 1878232d4b01SSage Weil if (S_ISDIR(inode->i_mode)) { 18791c1266bbSYehuda Sadeh if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), 18801c1266bbSYehuda Sadeh RBYTES)) 1881232d4b01SSage Weil stat->size = ci->i_rbytes; 18821c1266bbSYehuda Sadeh else 18831c1266bbSYehuda Sadeh stat->size = ci->i_files + ci->i_subdirs; 1884232d4b01SSage Weil stat->blocks = 0; 1885355da1ebSSage Weil stat->blksize = 65536; 1886355da1ebSSage Weil } 1887232d4b01SSage Weil } 1888355da1ebSSage Weil return err; 1889355da1ebSSage Weil } 1890