1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
23d14c5d2SYehuda Sadeh #include <linux/ceph/ceph_debug.h>
32817b000SSage Weil
42817b000SSage Weil #include <linux/spinlock.h>
52817b000SSage Weil #include <linux/namei.h>
65a0e3ad6STejun Heo #include <linux/slab.h>
72817b000SSage Weil #include <linux/sched.h>
82cdeb1e4SAndreas Gruenbacher #include <linux/xattr.h>
92817b000SSage Weil
102817b000SSage Weil #include "super.h"
113d14c5d2SYehuda Sadeh #include "mds_client.h"
12af9ffa6dSXiubo Li #include "crypto.h"
132817b000SSage Weil
142817b000SSage Weil /*
152817b000SSage Weil * Directory operations: readdir, lookup, create, link, unlink,
162817b000SSage Weil * rename, etc.
172817b000SSage Weil */
182817b000SSage Weil
192817b000SSage Weil /*
202817b000SSage Weil * Ceph MDS operations are specified in terms of a base ino and
212817b000SSage Weil * relative path. Thus, the client can specify an operation on a
222817b000SSage Weil * specific inode (e.g., a getattr due to fstat(2)), or as a path
232817b000SSage Weil * relative to, say, the root directory.
242817b000SSage Weil *
252817b000SSage Weil * Normally, we limit ourselves to strict inode ops (no path component)
262817b000SSage Weil * or dentry operations (a single path component relative to an ino). The
272817b000SSage Weil * exception to this is open_root_dentry(), which will open the mount
282817b000SSage Weil * point by name.
292817b000SSage Weil */
302817b000SSage Weil
3152dfb8acSSage Weil const struct dentry_operations ceph_dentry_ops;
322817b000SSage Weil
3337c4efc1SYan, Zheng static bool __dentry_lease_is_valid(struct ceph_dentry_info *di);
3437c4efc1SYan, Zheng static int __dir_lease_try_check(const struct dentry *dentry);
3537c4efc1SYan, Zheng
362817b000SSage Weil /*
372817b000SSage Weil * Initialize ceph dentry state.
382817b000SSage Weil */
ceph_d_init(struct dentry * dentry)39ad5cb123SAl Viro static int ceph_d_init(struct dentry *dentry)
402817b000SSage Weil {
412817b000SSage Weil struct ceph_dentry_info *di;
422678da88SXiubo Li struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dentry->d_sb);
432817b000SSage Weil
4499ec2697SGeliang Tang di = kmem_cache_zalloc(ceph_dentry_cachep, GFP_KERNEL);
452817b000SSage Weil if (!di)
462817b000SSage Weil return -ENOMEM; /* oh well */
472817b000SSage Weil
482817b000SSage Weil di->dentry = dentry;
492817b000SSage Weil di->lease_session = NULL;
509b16f03cSMiklos Szeredi di->time = jiffies;
5148d0cbd1SSage Weil dentry->d_fsdata = di;
5237c4efc1SYan, Zheng INIT_LIST_HEAD(&di->lease_list);
53f9009efaSXiubo Li
54f9009efaSXiubo Li atomic64_inc(&mdsc->metric.total_dentries);
55f9009efaSXiubo Li
562817b000SSage Weil return 0;
572817b000SSage Weil }
582817b000SSage Weil
592817b000SSage Weil /*
60f3c4ebe6SYan, Zheng * for f_pos for readdir:
61f3c4ebe6SYan, Zheng * - hash order:
62f3c4ebe6SYan, Zheng * (0xff << 52) | ((24 bits hash) << 28) |
63f3c4ebe6SYan, Zheng * (the nth entry has hash collision);
64f3c4ebe6SYan, Zheng * - frag+name order;
65f3c4ebe6SYan, Zheng * ((frag value) << 28) | (the nth entry in frag);
662817b000SSage Weil */
67f3c4ebe6SYan, Zheng #define OFFSET_BITS 28
68f3c4ebe6SYan, Zheng #define OFFSET_MASK ((1 << OFFSET_BITS) - 1)
69f3c4ebe6SYan, Zheng #define HASH_ORDER (0xffull << (OFFSET_BITS + 24))
ceph_make_fpos(unsigned high,unsigned off,bool hash_order)70f3c4ebe6SYan, Zheng loff_t ceph_make_fpos(unsigned high, unsigned off, bool hash_order)
71f3c4ebe6SYan, Zheng {
72f3c4ebe6SYan, Zheng loff_t fpos = ((loff_t)high << 28) | (loff_t)off;
73f3c4ebe6SYan, Zheng if (hash_order)
74f3c4ebe6SYan, Zheng fpos |= HASH_ORDER;
75f3c4ebe6SYan, Zheng return fpos;
76f3c4ebe6SYan, Zheng }
77f3c4ebe6SYan, Zheng
is_hash_order(loff_t p)78f3c4ebe6SYan, Zheng static bool is_hash_order(loff_t p)
79f3c4ebe6SYan, Zheng {
80f3c4ebe6SYan, Zheng return (p & HASH_ORDER) == HASH_ORDER;
81f3c4ebe6SYan, Zheng }
82f3c4ebe6SYan, Zheng
fpos_frag(loff_t p)832817b000SSage Weil static unsigned fpos_frag(loff_t p)
842817b000SSage Weil {
85f3c4ebe6SYan, Zheng return p >> OFFSET_BITS;
862817b000SSage Weil }
87f3c4ebe6SYan, Zheng
fpos_hash(loff_t p)88f3c4ebe6SYan, Zheng static unsigned fpos_hash(loff_t p)
89f3c4ebe6SYan, Zheng {
90f3c4ebe6SYan, Zheng return ceph_frag_value(fpos_frag(p));
91f3c4ebe6SYan, Zheng }
92f3c4ebe6SYan, Zheng
fpos_off(loff_t p)932817b000SSage Weil static unsigned fpos_off(loff_t p)
942817b000SSage Weil {
95f3c4ebe6SYan, Zheng return p & OFFSET_MASK;
962817b000SSage Weil }
972817b000SSage Weil
fpos_cmp(loff_t l,loff_t r)984d5f5df6SYan, Zheng static int fpos_cmp(loff_t l, loff_t r)
994d5f5df6SYan, Zheng {
1004d5f5df6SYan, Zheng int v = ceph_frag_compare(fpos_frag(l), fpos_frag(r));
1014d5f5df6SYan, Zheng if (v)
1024d5f5df6SYan, Zheng return v;
1034d5f5df6SYan, Zheng return (int)(fpos_off(l) - fpos_off(r));
1044d5f5df6SYan, Zheng }
1054d5f5df6SYan, Zheng
1062817b000SSage Weil /*
107fdd4e158SYan, Zheng * make note of the last dentry we read, so we can
108fdd4e158SYan, Zheng * continue at the same lexicographical point,
109fdd4e158SYan, Zheng * regardless of what dir changes take place on the
110fdd4e158SYan, Zheng * server.
111fdd4e158SYan, Zheng */
note_last_dentry(struct ceph_dir_file_info * dfi,const char * name,int len,unsigned next_offset)112bb48bd4dSChengguang Xu static int note_last_dentry(struct ceph_dir_file_info *dfi, const char *name,
113fdd4e158SYan, Zheng int len, unsigned next_offset)
114fdd4e158SYan, Zheng {
115fdd4e158SYan, Zheng char *buf = kmalloc(len+1, GFP_KERNEL);
116fdd4e158SYan, Zheng if (!buf)
117fdd4e158SYan, Zheng return -ENOMEM;
118bb48bd4dSChengguang Xu kfree(dfi->last_name);
119bb48bd4dSChengguang Xu dfi->last_name = buf;
120bb48bd4dSChengguang Xu memcpy(dfi->last_name, name, len);
121bb48bd4dSChengguang Xu dfi->last_name[len] = 0;
122bb48bd4dSChengguang Xu dfi->next_offset = next_offset;
123bb48bd4dSChengguang Xu dout("note_last_dentry '%s'\n", dfi->last_name);
124fdd4e158SYan, Zheng return 0;
125fdd4e158SYan, Zheng }
126fdd4e158SYan, Zheng
127c530cd24SYan, Zheng
128c530cd24SYan, Zheng static struct dentry *
__dcache_find_get_entry(struct dentry * parent,u64 idx,struct ceph_readdir_cache_control * cache_ctl)129c530cd24SYan, Zheng __dcache_find_get_entry(struct dentry *parent, u64 idx,
130c530cd24SYan, Zheng struct ceph_readdir_cache_control *cache_ctl)
131c530cd24SYan, Zheng {
132c530cd24SYan, Zheng struct inode *dir = d_inode(parent);
133c530cd24SYan, Zheng struct dentry *dentry;
134c530cd24SYan, Zheng unsigned idx_mask = (PAGE_SIZE / sizeof(struct dentry *)) - 1;
135c530cd24SYan, Zheng loff_t ptr_pos = idx * sizeof(struct dentry *);
136c530cd24SYan, Zheng pgoff_t ptr_pgoff = ptr_pos >> PAGE_SHIFT;
137c530cd24SYan, Zheng
138c530cd24SYan, Zheng if (ptr_pos >= i_size_read(dir))
139c530cd24SYan, Zheng return NULL;
140c530cd24SYan, Zheng
141c530cd24SYan, Zheng if (!cache_ctl->page || ptr_pgoff != page_index(cache_ctl->page)) {
142c530cd24SYan, Zheng ceph_readdir_cache_release(cache_ctl);
143c530cd24SYan, Zheng cache_ctl->page = find_lock_page(&dir->i_data, ptr_pgoff);
144c530cd24SYan, Zheng if (!cache_ctl->page) {
145c530cd24SYan, Zheng dout(" page %lu not found\n", ptr_pgoff);
146c530cd24SYan, Zheng return ERR_PTR(-EAGAIN);
147c530cd24SYan, Zheng }
148c530cd24SYan, Zheng /* reading/filling the cache are serialized by
149810313c5Shongnanli i_rwsem, no need to use page lock */
150c530cd24SYan, Zheng unlock_page(cache_ctl->page);
151c530cd24SYan, Zheng cache_ctl->dentries = kmap(cache_ctl->page);
152c530cd24SYan, Zheng }
153c530cd24SYan, Zheng
154c530cd24SYan, Zheng cache_ctl->index = idx & idx_mask;
155c530cd24SYan, Zheng
156c530cd24SYan, Zheng rcu_read_lock();
157c530cd24SYan, Zheng spin_lock(&parent->d_lock);
158c530cd24SYan, Zheng /* check i_size again here, because empty directory can be
159810313c5Shongnanli * marked as complete while not holding the i_rwsem. */
160c530cd24SYan, Zheng if (ceph_dir_is_complete_ordered(dir) && ptr_pos < i_size_read(dir))
161c530cd24SYan, Zheng dentry = cache_ctl->dentries[cache_ctl->index];
162c530cd24SYan, Zheng else
163c530cd24SYan, Zheng dentry = NULL;
164c530cd24SYan, Zheng spin_unlock(&parent->d_lock);
165c530cd24SYan, Zheng if (dentry && !lockref_get_not_dead(&dentry->d_lockref))
166c530cd24SYan, Zheng dentry = NULL;
167c530cd24SYan, Zheng rcu_read_unlock();
168c530cd24SYan, Zheng return dentry ? : ERR_PTR(-EAGAIN);
169c530cd24SYan, Zheng }
170c530cd24SYan, Zheng
171fdd4e158SYan, Zheng /*
1722817b000SSage Weil * When possible, we try to satisfy a readdir by peeking at the
1732817b000SSage Weil * dcache. We make this work by carefully ordering dentries on
174946e51f2SAl Viro * d_child when we initially get results back from the MDS, and
1752817b000SSage Weil * falling back to a "normal" sync readdir if any dentries in the dir
1762817b000SSage Weil * are dropped.
1772817b000SSage Weil *
1782f276c51SYan, Zheng * Complete dir indicates that we have all dentries in the dir. It is
1792817b000SSage Weil * defined IFF we hold CEPH_CAP_FILE_SHARED (which will be revoked by
1802817b000SSage Weil * the MDS if/when the directory is modified).
1812817b000SSage Weil */
__dcache_readdir(struct file * file,struct dir_context * ctx,int shared_gen)182a30be7cbSYan, Zheng static int __dcache_readdir(struct file *file, struct dir_context *ctx,
18397aeb6bfSYan, Zheng int shared_gen)
1842817b000SSage Weil {
185bb48bd4dSChengguang Xu struct ceph_dir_file_info *dfi = file->private_data;
186b583043eSAl Viro struct dentry *parent = file->f_path.dentry;
1872b0143b5SDavid Howells struct inode *dir = d_inode(parent);
188fdd4e158SYan, Zheng struct dentry *dentry, *last = NULL;
1892817b000SSage Weil struct ceph_dentry_info *di;
190fdd4e158SYan, Zheng struct ceph_readdir_cache_control cache_ctl = {};
191c530cd24SYan, Zheng u64 idx = 0;
192c530cd24SYan, Zheng int err = 0;
1932817b000SSage Weil
19497aeb6bfSYan, Zheng dout("__dcache_readdir %p v%u at %llx\n", dir, (unsigned)shared_gen, ctx->pos);
1952817b000SSage Weil
196c530cd24SYan, Zheng /* search start position */
197c530cd24SYan, Zheng if (ctx->pos > 2) {
198c530cd24SYan, Zheng u64 count = div_u64(i_size_read(dir), sizeof(struct dentry *));
199c530cd24SYan, Zheng while (count > 0) {
200c530cd24SYan, Zheng u64 step = count >> 1;
201c530cd24SYan, Zheng dentry = __dcache_find_get_entry(parent, idx + step,
202c530cd24SYan, Zheng &cache_ctl);
203c530cd24SYan, Zheng if (!dentry) {
204c530cd24SYan, Zheng /* use linar search */
205c530cd24SYan, Zheng idx = 0;
206c530cd24SYan, Zheng break;
207c530cd24SYan, Zheng }
208c530cd24SYan, Zheng if (IS_ERR(dentry)) {
209c530cd24SYan, Zheng err = PTR_ERR(dentry);
210c530cd24SYan, Zheng goto out;
211c530cd24SYan, Zheng }
212c530cd24SYan, Zheng di = ceph_dentry(dentry);
213c530cd24SYan, Zheng spin_lock(&dentry->d_lock);
214c530cd24SYan, Zheng if (fpos_cmp(di->offset, ctx->pos) < 0) {
215c530cd24SYan, Zheng idx += step + 1;
216c530cd24SYan, Zheng count -= step + 1;
217c530cd24SYan, Zheng } else {
218c530cd24SYan, Zheng count = step;
219c530cd24SYan, Zheng }
220c530cd24SYan, Zheng spin_unlock(&dentry->d_lock);
221c530cd24SYan, Zheng dput(dentry);
2222817b000SSage Weil }
2232817b000SSage Weil
224c530cd24SYan, Zheng dout("__dcache_readdir %p cache idx %llu\n", dir, idx);
225c530cd24SYan, Zheng }
226fdd4e158SYan, Zheng
227c530cd24SYan, Zheng
228c530cd24SYan, Zheng for (;;) {
229c530cd24SYan, Zheng bool emit_dentry = false;
230c530cd24SYan, Zheng dentry = __dcache_find_get_entry(parent, idx++, &cache_ctl);
231c530cd24SYan, Zheng if (!dentry) {
232bb48bd4dSChengguang Xu dfi->file_info.flags |= CEPH_F_ATEND;
233fdd4e158SYan, Zheng err = 0;
234fdd4e158SYan, Zheng break;
2352817b000SSage Weil }
236c530cd24SYan, Zheng if (IS_ERR(dentry)) {
237c530cd24SYan, Zheng err = PTR_ERR(dentry);
238c530cd24SYan, Zheng goto out;
239fdd4e158SYan, Zheng }
240fdd4e158SYan, Zheng
241fdd4e158SYan, Zheng spin_lock(&dentry->d_lock);
2425495c2d0SYan, Zheng di = ceph_dentry(dentry);
2435495c2d0SYan, Zheng if (d_unhashed(dentry) ||
2445495c2d0SYan, Zheng d_really_is_negative(dentry) ||
245af9ffa6dSXiubo Li di->lease_shared_gen != shared_gen ||
246af9ffa6dSXiubo Li ((dentry->d_flags & DCACHE_NOKEY_NAME) &&
247af9ffa6dSXiubo Li fscrypt_has_encryption_key(dir))) {
2485495c2d0SYan, Zheng spin_unlock(&dentry->d_lock);
2495495c2d0SYan, Zheng dput(dentry);
2505495c2d0SYan, Zheng err = -EAGAIN;
2515495c2d0SYan, Zheng goto out;
2525495c2d0SYan, Zheng }
2535495c2d0SYan, Zheng if (fpos_cmp(ctx->pos, di->offset) <= 0) {
25437c4efc1SYan, Zheng __ceph_dentry_dir_lease_touch(di);
255fdd4e158SYan, Zheng emit_dentry = true;
2562817b000SSage Weil }
257b7ab39f6SNick Piggin spin_unlock(&dentry->d_lock);
2582817b000SSage Weil
259fdd4e158SYan, Zheng if (emit_dentry) {
260f3c4ebe6SYan, Zheng dout(" %llx dentry %p %pd %p\n", di->offset,
2612b0143b5SDavid Howells dentry, dentry, d_inode(dentry));
262fdd4e158SYan, Zheng ctx->pos = di->offset;
26377acfa29SAl Viro if (!dir_emit(ctx, dentry->d_name.name,
264ebce3eb2SJeff Layton dentry->d_name.len, ceph_present_inode(d_inode(dentry)),
2652b0143b5SDavid Howells d_inode(dentry)->i_mode >> 12)) {
26677acfa29SAl Viro dput(dentry);
267fdd4e158SYan, Zheng err = 0;
268fdd4e158SYan, Zheng break;
26977acfa29SAl Viro }
270fdd4e158SYan, Zheng ctx->pos++;
2710081bd83SYan, Zheng
27277acfa29SAl Viro if (last)
2732817b000SSage Weil dput(last);
274f5b06628SSage Weil last = dentry;
275fdd4e158SYan, Zheng } else {
276fdd4e158SYan, Zheng dput(dentry);
2772817b000SSage Weil }
278fdd4e158SYan, Zheng }
279c530cd24SYan, Zheng out:
280fdd4e158SYan, Zheng ceph_readdir_cache_release(&cache_ctl);
281fdd4e158SYan, Zheng if (last) {
282fdd4e158SYan, Zheng int ret;
283fdd4e158SYan, Zheng di = ceph_dentry(last);
284bb48bd4dSChengguang Xu ret = note_last_dentry(dfi, last->d_name.name, last->d_name.len,
285fdd4e158SYan, Zheng fpos_off(di->offset) + 1);
286fdd4e158SYan, Zheng if (ret < 0)
287fdd4e158SYan, Zheng err = ret;
288fdd4e158SYan, Zheng dput(last);
28984583cfbSYan, Zheng /* last_name no longer match cache index */
290bb48bd4dSChengguang Xu if (dfi->readdir_cache_idx >= 0) {
291bb48bd4dSChengguang Xu dfi->readdir_cache_idx = -1;
292bb48bd4dSChengguang Xu dfi->dir_release_count = 0;
29384583cfbSYan, Zheng }
294fdd4e158SYan, Zheng }
295fdd4e158SYan, Zheng return err;
2962817b000SSage Weil }
2972817b000SSage Weil
need_send_readdir(struct ceph_dir_file_info * dfi,loff_t pos)298bb48bd4dSChengguang Xu static bool need_send_readdir(struct ceph_dir_file_info *dfi, loff_t pos)
299f3c4ebe6SYan, Zheng {
300bb48bd4dSChengguang Xu if (!dfi->last_readdir)
301f3c4ebe6SYan, Zheng return true;
302f3c4ebe6SYan, Zheng if (is_hash_order(pos))
303bb48bd4dSChengguang Xu return !ceph_frag_contains_value(dfi->frag, fpos_hash(pos));
304f3c4ebe6SYan, Zheng else
305bb48bd4dSChengguang Xu return dfi->frag != fpos_frag(pos);
306f3c4ebe6SYan, Zheng }
307f3c4ebe6SYan, Zheng
ceph_readdir(struct file * file,struct dir_context * ctx)30877acfa29SAl Viro static int ceph_readdir(struct file *file, struct dir_context *ctx)
3092817b000SSage Weil {
310bb48bd4dSChengguang Xu struct ceph_dir_file_info *dfi = file->private_data;
31177acfa29SAl Viro struct inode *inode = file_inode(file);
3122817b000SSage Weil struct ceph_inode_info *ci = ceph_inode(inode);
313*985b9ee8SXiubo Li struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode);
3143d14c5d2SYehuda Sadeh struct ceph_mds_client *mdsc = fsc->mdsc;
3158974eebdSYan, Zheng int i;
3162817b000SSage Weil int err;
317b50c2de5SYan, Zheng unsigned frag = -1;
3182817b000SSage Weil struct ceph_mds_reply_info_parsed *rinfo;
3192817b000SSage Weil
3208974eebdSYan, Zheng dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos);
321bb48bd4dSChengguang Xu if (dfi->file_info.flags & CEPH_F_ATEND)
3222817b000SSage Weil return 0;
3232817b000SSage Weil
3242817b000SSage Weil /* always start with . and .. */
32577acfa29SAl Viro if (ctx->pos == 0) {
3262817b000SSage Weil dout("readdir off 0 -> '.'\n");
327ebce3eb2SJeff Layton if (!dir_emit(ctx, ".", 1, ceph_present_inode(inode),
32877acfa29SAl Viro inode->i_mode >> 12))
3292817b000SSage Weil return 0;
33077acfa29SAl Viro ctx->pos = 1;
3312817b000SSage Weil }
33277acfa29SAl Viro if (ctx->pos == 1) {
333ebce3eb2SJeff Layton u64 ino;
334ebce3eb2SJeff Layton struct dentry *dentry = file->f_path.dentry;
335ebce3eb2SJeff Layton
336ebce3eb2SJeff Layton spin_lock(&dentry->d_lock);
337ebce3eb2SJeff Layton ino = ceph_present_inode(dentry->d_parent->d_inode);
338ebce3eb2SJeff Layton spin_unlock(&dentry->d_lock);
339ebce3eb2SJeff Layton
3402817b000SSage Weil dout("readdir off 1 -> '..'\n");
341ebce3eb2SJeff Layton if (!dir_emit(ctx, "..", 2, ino, inode->i_mode >> 12))
3422817b000SSage Weil return 0;
34377acfa29SAl Viro ctx->pos = 2;
3442817b000SSage Weil }
3452817b000SSage Weil
34614e034a6SLuís Henriques err = ceph_fscrypt_prepare_readdir(inode);
34714e034a6SLuís Henriques if (err < 0)
348af9ffa6dSXiubo Li return err;
349af9ffa6dSXiubo Li
350be655596SSage Weil spin_lock(&ci->i_ceph_lock);
351719a2514SYan, Zheng /* request Fx cap. if have Fx, we don't need to release Fs cap
352719a2514SYan, Zheng * for later create/unlink. */
353719a2514SYan, Zheng __ceph_touch_fmode(ci, mdsc, CEPH_FILE_MODE_WR);
354719a2514SYan, Zheng /* can we use the dcache? */
355fdd4e158SYan, Zheng if (ceph_test_mount_opt(fsc, DCACHE) &&
3563d14c5d2SYehuda Sadeh !ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
357a0dff78dSSage Weil ceph_snap(inode) != CEPH_SNAPDIR &&
35870db4f36SYan, Zheng __ceph_dir_is_complete_ordered(ci) &&
3591af16d54SXiubo Li __ceph_caps_issued_mask_metric(ci, CEPH_CAP_FILE_SHARED, 1)) {
36097aeb6bfSYan, Zheng int shared_gen = atomic_read(&ci->i_shared_gen);
3611af16d54SXiubo Li
362be655596SSage Weil spin_unlock(&ci->i_ceph_lock);
363a30be7cbSYan, Zheng err = __dcache_readdir(file, ctx, shared_gen);
364efa4c120SSage Weil if (err != -EAGAIN)
3652817b000SSage Weil return err;
366efa4c120SSage Weil } else {
367be655596SSage Weil spin_unlock(&ci->i_ceph_lock);
368efa4c120SSage Weil }
3692817b000SSage Weil
3702817b000SSage Weil /* proceed with a normal readdir */
3712817b000SSage Weil more:
3722817b000SSage Weil /* do we have the correct frag content buffered? */
373bb48bd4dSChengguang Xu if (need_send_readdir(dfi, ctx->pos)) {
3742817b000SSage Weil struct ceph_mds_request *req;
3752817b000SSage Weil int op = ceph_snap(inode) == CEPH_SNAPDIR ?
3762817b000SSage Weil CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR;
3772817b000SSage Weil
3782817b000SSage Weil /* discard old result, if any */
379bb48bd4dSChengguang Xu if (dfi->last_readdir) {
380bb48bd4dSChengguang Xu ceph_mdsc_put_request(dfi->last_readdir);
381bb48bd4dSChengguang Xu dfi->last_readdir = NULL;
382393f6620SSage Weil }
3832817b000SSage Weil
384f3c4ebe6SYan, Zheng if (is_hash_order(ctx->pos)) {
385b50c2de5SYan, Zheng /* fragtree isn't always accurate. choose frag
386b50c2de5SYan, Zheng * based on previous reply when possible. */
387b50c2de5SYan, Zheng if (frag == (unsigned)-1)
388f3c4ebe6SYan, Zheng frag = ceph_choose_frag(ci, fpos_hash(ctx->pos),
389f3c4ebe6SYan, Zheng NULL, NULL);
390f3c4ebe6SYan, Zheng } else {
391f3c4ebe6SYan, Zheng frag = fpos_frag(ctx->pos);
392f3c4ebe6SYan, Zheng }
393f3c4ebe6SYan, Zheng
3942817b000SSage Weil dout("readdir fetching %llx.%llx frag %x offset '%s'\n",
395bb48bd4dSChengguang Xu ceph_vinop(inode), frag, dfi->last_name);
3962817b000SSage Weil req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
3972817b000SSage Weil if (IS_ERR(req))
3982817b000SSage Weil return PTR_ERR(req);
399af9ffa6dSXiubo Li
40054008399SYan, Zheng err = ceph_alloc_readdir_reply_buffer(req, inode);
40154008399SYan, Zheng if (err) {
40254008399SYan, Zheng ceph_mdsc_put_request(req);
40354008399SYan, Zheng return err;
40454008399SYan, Zheng }
4052817b000SSage Weil /* hints to request -> mds selection code */
4062817b000SSage Weil req->r_direct_mode = USE_AUTH_MDS;
4075d37ca14SYan, Zheng if (op == CEPH_MDS_OP_READDIR) {
4082817b000SSage Weil req->r_direct_hash = ceph_frag_value(frag);
409bc2de10dSJeff Layton __set_bit(CEPH_MDS_R_DIRECT_IS_HASH, &req->r_req_flags);
41087c91a96SYan, Zheng req->r_inode_drop = CEPH_CAP_FILE_EXCL;
4115d37ca14SYan, Zheng }
412bb48bd4dSChengguang Xu if (dfi->last_name) {
413af9ffa6dSXiubo Li struct qstr d_name = { .name = dfi->last_name,
414af9ffa6dSXiubo Li .len = strlen(dfi->last_name) };
415af9ffa6dSXiubo Li
416af9ffa6dSXiubo Li req->r_path2 = kzalloc(NAME_MAX + 1, GFP_KERNEL);
417a149bb9aSSanidhya Kashyap if (!req->r_path2) {
418a149bb9aSSanidhya Kashyap ceph_mdsc_put_request(req);
419a149bb9aSSanidhya Kashyap return -ENOMEM;
420a149bb9aSSanidhya Kashyap }
421af9ffa6dSXiubo Li
422af9ffa6dSXiubo Li err = ceph_encode_encrypted_dname(inode, &d_name,
423af9ffa6dSXiubo Li req->r_path2);
424af9ffa6dSXiubo Li if (err < 0) {
425af9ffa6dSXiubo Li ceph_mdsc_put_request(req);
426af9ffa6dSXiubo Li return err;
427af9ffa6dSXiubo Li }
42879162547SYan, Zheng } else if (is_hash_order(ctx->pos)) {
42979162547SYan, Zheng req->r_args.readdir.offset_hash =
43079162547SYan, Zheng cpu_to_le32(fpos_hash(ctx->pos));
431a149bb9aSSanidhya Kashyap }
43279162547SYan, Zheng
433bb48bd4dSChengguang Xu req->r_dir_release_cnt = dfi->dir_release_count;
434bb48bd4dSChengguang Xu req->r_dir_ordered_cnt = dfi->dir_ordered_count;
435bb48bd4dSChengguang Xu req->r_readdir_cache_idx = dfi->readdir_cache_idx;
436bb48bd4dSChengguang Xu req->r_readdir_offset = dfi->next_offset;
4372817b000SSage Weil req->r_args.readdir.frag = cpu_to_le32(frag);
438956d39d6SYan, Zheng req->r_args.readdir.flags =
439956d39d6SYan, Zheng cpu_to_le16(CEPH_READDIR_REPLY_BITFLAGS);
440a149bb9aSSanidhya Kashyap
441a149bb9aSSanidhya Kashyap req->r_inode = inode;
442a149bb9aSSanidhya Kashyap ihold(inode);
443a149bb9aSSanidhya Kashyap req->r_dentry = dget(file->f_path.dentry);
4442817b000SSage Weil err = ceph_mdsc_do_request(mdsc, NULL, req);
4452817b000SSage Weil if (err < 0) {
4462817b000SSage Weil ceph_mdsc_put_request(req);
4472817b000SSage Weil return err;
4482817b000SSage Weil }
449f3c4ebe6SYan, Zheng dout("readdir got and parsed readdir result=%d on "
450f3c4ebe6SYan, Zheng "frag %x, end=%d, complete=%d, hash_order=%d\n",
451f3c4ebe6SYan, Zheng err, frag,
4522817b000SSage Weil (int)req->r_reply_info.dir_end,
453f3c4ebe6SYan, Zheng (int)req->r_reply_info.dir_complete,
454f3c4ebe6SYan, Zheng (int)req->r_reply_info.hash_order);
4552817b000SSage Weil
45681c6aea5SYan, Zheng rinfo = &req->r_reply_info;
45781c6aea5SYan, Zheng if (le32_to_cpu(rinfo->dir_dir->frag) != frag) {
45881c6aea5SYan, Zheng frag = le32_to_cpu(rinfo->dir_dir->frag);
459f3c4ebe6SYan, Zheng if (!rinfo->hash_order) {
460bb48bd4dSChengguang Xu dfi->next_offset = req->r_readdir_offset;
4618974eebdSYan, Zheng /* adjust ctx->pos to beginning of frag */
462f3c4ebe6SYan, Zheng ctx->pos = ceph_make_fpos(frag,
463bb48bd4dSChengguang Xu dfi->next_offset,
464f3c4ebe6SYan, Zheng false);
465f3c4ebe6SYan, Zheng }
46681c6aea5SYan, Zheng }
467fdd4e158SYan, Zheng
468bb48bd4dSChengguang Xu dfi->frag = frag;
469bb48bd4dSChengguang Xu dfi->last_readdir = req;
4702817b000SSage Weil
471bc2de10dSJeff Layton if (test_bit(CEPH_MDS_R_DID_PREPOPULATE, &req->r_req_flags)) {
472bb48bd4dSChengguang Xu dfi->readdir_cache_idx = req->r_readdir_cache_idx;
473bb48bd4dSChengguang Xu if (dfi->readdir_cache_idx < 0) {
474fdd4e158SYan, Zheng /* preclude from marking dir ordered */
475bb48bd4dSChengguang Xu dfi->dir_ordered_count = 0;
4768974eebdSYan, Zheng } else if (ceph_frag_is_leftmost(frag) &&
477bb48bd4dSChengguang Xu dfi->next_offset == 2) {
478fdd4e158SYan, Zheng /* note dir version at start of readdir so
479fdd4e158SYan, Zheng * we can tell if any dentries get dropped */
480bb48bd4dSChengguang Xu dfi->dir_release_count = req->r_dir_release_cnt;
481bb48bd4dSChengguang Xu dfi->dir_ordered_count = req->r_dir_ordered_cnt;
482fdd4e158SYan, Zheng }
483fdd4e158SYan, Zheng } else {
4844c069a58SChengguang Xu dout("readdir !did_prepopulate\n");
485fdd4e158SYan, Zheng /* disable readdir cache */
486bb48bd4dSChengguang Xu dfi->readdir_cache_idx = -1;
487fdd4e158SYan, Zheng /* preclude from marking dir complete */
488bb48bd4dSChengguang Xu dfi->dir_release_count = 0;
489fdd4e158SYan, Zheng }
490fdd4e158SYan, Zheng
491f3c4ebe6SYan, Zheng /* note next offset and last dentry name */
492f3c4ebe6SYan, Zheng if (rinfo->dir_nr > 0) {
4932a5beea3SYan, Zheng struct ceph_mds_reply_dir_entry *rde =
4942a5beea3SYan, Zheng rinfo->dir_entries + (rinfo->dir_nr-1);
495f3c4ebe6SYan, Zheng unsigned next_offset = req->r_reply_info.dir_end ?
496f3c4ebe6SYan, Zheng 2 : (fpos_off(rde->offset) + 1);
497bb48bd4dSChengguang Xu err = note_last_dentry(dfi, rde->name, rde->name_len,
498f3c4ebe6SYan, Zheng next_offset);
499f639d986SXiubo Li if (err) {
500f639d986SXiubo Li ceph_mdsc_put_request(dfi->last_readdir);
501f639d986SXiubo Li dfi->last_readdir = NULL;
5022817b000SSage Weil return err;
503f639d986SXiubo Li }
504f3c4ebe6SYan, Zheng } else if (req->r_reply_info.dir_end) {
505bb48bd4dSChengguang Xu dfi->next_offset = 2;
506f3c4ebe6SYan, Zheng /* keep last name */
5072817b000SSage Weil }
5082817b000SSage Weil }
5092817b000SSage Weil
510bb48bd4dSChengguang Xu rinfo = &dfi->last_readdir->r_reply_info;
5118974eebdSYan, Zheng dout("readdir frag %x num %d pos %llx chunk first %llx\n",
512bb48bd4dSChengguang Xu dfi->frag, rinfo->dir_nr, ctx->pos,
5138974eebdSYan, Zheng rinfo->dir_nr ? rinfo->dir_entries[0].offset : 0LL);
51477acfa29SAl Viro
5158974eebdSYan, Zheng i = 0;
5168974eebdSYan, Zheng /* search start position */
5178974eebdSYan, Zheng if (rinfo->dir_nr > 0) {
5188974eebdSYan, Zheng int step, nr = rinfo->dir_nr;
5198974eebdSYan, Zheng while (nr > 0) {
5208974eebdSYan, Zheng step = nr >> 1;
5218974eebdSYan, Zheng if (rinfo->dir_entries[i + step].offset < ctx->pos) {
5228974eebdSYan, Zheng i += step + 1;
5238974eebdSYan, Zheng nr -= step + 1;
5248974eebdSYan, Zheng } else {
5258974eebdSYan, Zheng nr = step;
5268974eebdSYan, Zheng }
5278974eebdSYan, Zheng }
5288974eebdSYan, Zheng }
5298974eebdSYan, Zheng for (; i < rinfo->dir_nr; i++) {
5308974eebdSYan, Zheng struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i;
5313105c19cSSage Weil
532af9ffa6dSXiubo Li if (rde->offset < ctx->pos) {
533af9ffa6dSXiubo Li pr_warn("%s: rde->offset 0x%llx ctx->pos 0x%llx\n",
534af9ffa6dSXiubo Li __func__, rde->offset, ctx->pos);
535af9ffa6dSXiubo Li return -EIO;
536af9ffa6dSXiubo Li }
537af9ffa6dSXiubo Li
538af9ffa6dSXiubo Li if (WARN_ON_ONCE(!rde->inode.in))
539af9ffa6dSXiubo Li return -EIO;
5408974eebdSYan, Zheng
5418974eebdSYan, Zheng ctx->pos = rde->offset;
5428974eebdSYan, Zheng dout("readdir (%d/%d) -> %llx '%.*s' %p\n",
5438974eebdSYan, Zheng i, rinfo->dir_nr, ctx->pos,
5442a5beea3SYan, Zheng rde->name_len, rde->name, &rde->inode.in);
5458974eebdSYan, Zheng
5462a5beea3SYan, Zheng if (!dir_emit(ctx, rde->name, rde->name_len,
547ebce3eb2SJeff Layton ceph_present_ino(inode->i_sb, le64_to_cpu(rde->inode.in->ino)),
548ebce3eb2SJeff Layton le32_to_cpu(rde->inode.in->mode) >> 12)) {
549f639d986SXiubo Li /*
550f639d986SXiubo Li * NOTE: Here no need to put the 'dfi->last_readdir',
551f639d986SXiubo Li * because when dir_emit stops us it's most likely
552f639d986SXiubo Li * doesn't have enough memory, etc. So for next readdir
553f639d986SXiubo Li * it will continue.
554f639d986SXiubo Li */
5552817b000SSage Weil dout("filldir stopping us...\n");
5562817b000SSage Weil return 0;
5572817b000SSage Weil }
558af9ffa6dSXiubo Li
559af9ffa6dSXiubo Li /* Reset the lengths to their original allocated vals */
56077acfa29SAl Viro ctx->pos++;
5612817b000SSage Weil }
5622817b000SSage Weil
563bb48bd4dSChengguang Xu ceph_mdsc_put_request(dfi->last_readdir);
564bb48bd4dSChengguang Xu dfi->last_readdir = NULL;
565b50c2de5SYan, Zheng
566bb48bd4dSChengguang Xu if (dfi->next_offset > 2) {
567bb48bd4dSChengguang Xu frag = dfi->frag;
5682817b000SSage Weil goto more;
5692817b000SSage Weil }
5702817b000SSage Weil
5712817b000SSage Weil /* more frags? */
572bb48bd4dSChengguang Xu if (!ceph_frag_is_rightmost(dfi->frag)) {
573bb48bd4dSChengguang Xu frag = ceph_frag_next(dfi->frag);
574f3c4ebe6SYan, Zheng if (is_hash_order(ctx->pos)) {
575f3c4ebe6SYan, Zheng loff_t new_pos = ceph_make_fpos(ceph_frag_value(frag),
576bb48bd4dSChengguang Xu dfi->next_offset, true);
577f3c4ebe6SYan, Zheng if (new_pos > ctx->pos)
578f3c4ebe6SYan, Zheng ctx->pos = new_pos;
579f3c4ebe6SYan, Zheng /* keep last_name */
580f3c4ebe6SYan, Zheng } else {
581bb48bd4dSChengguang Xu ctx->pos = ceph_make_fpos(frag, dfi->next_offset,
582bb48bd4dSChengguang Xu false);
583bb48bd4dSChengguang Xu kfree(dfi->last_name);
584bb48bd4dSChengguang Xu dfi->last_name = NULL;
585f3c4ebe6SYan, Zheng }
5862817b000SSage Weil dout("readdir next frag is %x\n", frag);
5872817b000SSage Weil goto more;
5882817b000SSage Weil }
589bb48bd4dSChengguang Xu dfi->file_info.flags |= CEPH_F_ATEND;
5902817b000SSage Weil
5912817b000SSage Weil /*
5922817b000SSage Weil * if dir_release_count still matches the dir, no dentries
5932817b000SSage Weil * were released during the whole readdir, and we should have
5942817b000SSage Weil * the complete dir contents in our cache.
5952817b000SSage Weil */
596bb48bd4dSChengguang Xu if (atomic64_read(&ci->i_release_count) ==
597bb48bd4dSChengguang Xu dfi->dir_release_count) {
598be655596SSage Weil spin_lock(&ci->i_ceph_lock);
599bb48bd4dSChengguang Xu if (dfi->dir_ordered_count ==
600bb48bd4dSChengguang Xu atomic64_read(&ci->i_ordered_count)) {
60170db4f36SYan, Zheng dout(" marking %p complete and ordered\n", inode);
602fdd4e158SYan, Zheng /* use i_size to track number of entries in
603fdd4e158SYan, Zheng * readdir cache */
604bb48bd4dSChengguang Xu BUG_ON(dfi->readdir_cache_idx < 0);
605bb48bd4dSChengguang Xu i_size_write(inode, dfi->readdir_cache_idx *
606fdd4e158SYan, Zheng sizeof(struct dentry*));
607fdd4e158SYan, Zheng } else {
608a8673d61SYan, Zheng dout(" marking %p complete\n", inode);
609fdd4e158SYan, Zheng }
610bb48bd4dSChengguang Xu __ceph_dir_set_complete(ci, dfi->dir_release_count,
611bb48bd4dSChengguang Xu dfi->dir_ordered_count);
612be655596SSage Weil spin_unlock(&ci->i_ceph_lock);
613fdd4e158SYan, Zheng }
61477acfa29SAl Viro dout("readdir %p file %p done.\n", inode, file);
6152817b000SSage Weil return 0;
6162817b000SSage Weil }
6172817b000SSage Weil
reset_readdir(struct ceph_dir_file_info * dfi)618bb48bd4dSChengguang Xu static void reset_readdir(struct ceph_dir_file_info *dfi)
6192817b000SSage Weil {
620bb48bd4dSChengguang Xu if (dfi->last_readdir) {
621bb48bd4dSChengguang Xu ceph_mdsc_put_request(dfi->last_readdir);
622bb48bd4dSChengguang Xu dfi->last_readdir = NULL;
6232817b000SSage Weil }
624bb48bd4dSChengguang Xu kfree(dfi->last_name);
625bb48bd4dSChengguang Xu dfi->last_name = NULL;
626bb48bd4dSChengguang Xu dfi->dir_release_count = 0;
627bb48bd4dSChengguang Xu dfi->readdir_cache_idx = -1;
628bb48bd4dSChengguang Xu dfi->next_offset = 2; /* compensate for . and .. */
629bb48bd4dSChengguang Xu dfi->file_info.flags &= ~CEPH_F_ATEND;
6302817b000SSage Weil }
6312817b000SSage Weil
6328974eebdSYan, Zheng /*
6338974eebdSYan, Zheng * discard buffered readdir content on seekdir(0), or seek to new frag,
6348974eebdSYan, Zheng * or seek prior to current chunk
6358974eebdSYan, Zheng */
need_reset_readdir(struct ceph_dir_file_info * dfi,loff_t new_pos)636bb48bd4dSChengguang Xu static bool need_reset_readdir(struct ceph_dir_file_info *dfi, loff_t new_pos)
6378974eebdSYan, Zheng {
6388974eebdSYan, Zheng struct ceph_mds_reply_info_parsed *rinfo;
639f3c4ebe6SYan, Zheng loff_t chunk_offset;
6408974eebdSYan, Zheng if (new_pos == 0)
6418974eebdSYan, Zheng return true;
642f3c4ebe6SYan, Zheng if (is_hash_order(new_pos)) {
643f3c4ebe6SYan, Zheng /* no need to reset last_name for a forward seek when
644f3c4ebe6SYan, Zheng * dentries are sotred in hash order */
645bb48bd4dSChengguang Xu } else if (dfi->frag != fpos_frag(new_pos)) {
6468974eebdSYan, Zheng return true;
647f3c4ebe6SYan, Zheng }
648bb48bd4dSChengguang Xu rinfo = dfi->last_readdir ? &dfi->last_readdir->r_reply_info : NULL;
6498974eebdSYan, Zheng if (!rinfo || !rinfo->dir_nr)
6508974eebdSYan, Zheng return true;
651f3c4ebe6SYan, Zheng chunk_offset = rinfo->dir_entries[0].offset;
652f3c4ebe6SYan, Zheng return new_pos < chunk_offset ||
653f3c4ebe6SYan, Zheng is_hash_order(new_pos) != is_hash_order(chunk_offset);
6548974eebdSYan, Zheng }
6558974eebdSYan, Zheng
ceph_dir_llseek(struct file * file,loff_t offset,int whence)656965c8e59SAndrew Morton static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
6572817b000SSage Weil {
658bb48bd4dSChengguang Xu struct ceph_dir_file_info *dfi = file->private_data;
6592817b000SSage Weil struct inode *inode = file->f_mapping->host;
6602817b000SSage Weil loff_t retval;
6612817b000SSage Weil
6625955102cSAl Viro inode_lock(inode);
66306222e49SJosef Bacik retval = -EINVAL;
664965c8e59SAndrew Morton switch (whence) {
6652817b000SSage Weil case SEEK_CUR:
6662817b000SSage Weil offset += file->f_pos;
667fcaddb1dSGustavo A. R. Silva break;
66806222e49SJosef Bacik case SEEK_SET:
66906222e49SJosef Bacik break;
670fdd4e158SYan, Zheng case SEEK_END:
671fdd4e158SYan, Zheng retval = -EOPNOTSUPP;
672fcaddb1dSGustavo A. R. Silva goto out;
67306222e49SJosef Bacik default:
67406222e49SJosef Bacik goto out;
6752817b000SSage Weil }
67606222e49SJosef Bacik
677f0494206SYan, Zheng if (offset >= 0) {
678bb48bd4dSChengguang Xu if (need_reset_readdir(dfi, offset)) {
679f3c4ebe6SYan, Zheng dout("dir_llseek dropping %p content\n", file);
680bb48bd4dSChengguang Xu reset_readdir(dfi);
681f3c4ebe6SYan, Zheng } else if (is_hash_order(offset) && offset > file->f_pos) {
682f3c4ebe6SYan, Zheng /* for hash offset, we don't know if a forward seek
683f3c4ebe6SYan, Zheng * is within same frag */
684bb48bd4dSChengguang Xu dfi->dir_release_count = 0;
685bb48bd4dSChengguang Xu dfi->readdir_cache_idx = -1;
686f3c4ebe6SYan, Zheng }
687f3c4ebe6SYan, Zheng
6882817b000SSage Weil if (offset != file->f_pos) {
6892817b000SSage Weil file->f_pos = offset;
6902817b000SSage Weil file->f_version = 0;
691bb48bd4dSChengguang Xu dfi->file_info.flags &= ~CEPH_F_ATEND;
6922817b000SSage Weil }
6932817b000SSage Weil retval = offset;
6942817b000SSage Weil }
69506222e49SJosef Bacik out:
6965955102cSAl Viro inode_unlock(inode);
6972817b000SSage Weil return retval;
6982817b000SSage Weil }
6992817b000SSage Weil
7002817b000SSage Weil /*
701468640e3SSage Weil * Handle lookups for the hidden .snap directory.
7022817b000SSage Weil */
ceph_handle_snapdir(struct ceph_mds_request * req,struct dentry * dentry)703aa60cfc3SJeff Layton struct dentry *ceph_handle_snapdir(struct ceph_mds_request *req,
7047a971e2cSJeff Layton struct dentry *dentry)
7052817b000SSage Weil {
706*985b9ee8SXiubo Li struct ceph_fs_client *fsc = ceph_sb_to_fs_client(dentry->d_sb);
707810313c5Shongnanli struct inode *parent = d_inode(dentry->d_parent); /* we hold i_rwsem */
7082817b000SSage Weil
7092817b000SSage Weil /* .snap dir? */
7107a971e2cSJeff Layton if (ceph_snap(parent) == CEPH_NOSNAP &&
711aa60cfc3SJeff Layton strcmp(dentry->d_name.name, fsc->mount_options->snapdir_name) == 0) {
712aa60cfc3SJeff Layton struct dentry *res;
7132817b000SSage Weil struct inode *inode = ceph_get_snapdir(parent);
714aa60cfc3SJeff Layton
715aa60cfc3SJeff Layton res = d_splice_alias(inode, dentry);
716aa60cfc3SJeff Layton dout("ENOENT on snapdir %p '%pd', linking to snapdir %p. Spliced dentry %p\n",
717aa60cfc3SJeff Layton dentry, dentry, inode, res);
718aa60cfc3SJeff Layton if (res)
719aa60cfc3SJeff Layton dentry = res;
7202817b000SSage Weil }
721aa60cfc3SJeff Layton return dentry;
722468640e3SSage Weil }
7232817b000SSage Weil
724468640e3SSage Weil /*
725468640e3SSage Weil * Figure out final result of a lookup/open request.
726468640e3SSage Weil *
727468640e3SSage Weil * Mainly, make sure we return the final req->r_dentry (if it already
728468640e3SSage Weil * existed) in place of the original VFS-provided dentry when they
729468640e3SSage Weil * differ.
730468640e3SSage Weil *
731468640e3SSage Weil * Gracefully handle the case where the MDS replies with -ENOENT and
732468640e3SSage Weil * no trace (which it may do, at its discretion, e.g., if it doesn't
733468640e3SSage Weil * care to issue a lease on the negative dentry).
734468640e3SSage Weil */
ceph_finish_lookup(struct ceph_mds_request * req,struct dentry * dentry,int err)735468640e3SSage Weil struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
736468640e3SSage Weil struct dentry *dentry, int err)
737468640e3SSage Weil {
7382817b000SSage Weil if (err == -ENOENT) {
7392817b000SSage Weil /* no trace? */
7402817b000SSage Weil err = 0;
7412817b000SSage Weil if (!req->r_reply_info.head->is_dentry) {
7422817b000SSage Weil dout("ENOENT and no trace, dentry %p inode %p\n",
7432b0143b5SDavid Howells dentry, d_inode(dentry));
7442b0143b5SDavid Howells if (d_really_is_positive(dentry)) {
7452817b000SSage Weil d_drop(dentry);
7462817b000SSage Weil err = -ENOENT;
7472817b000SSage Weil } else {
7482817b000SSage Weil d_add(dentry, NULL);
7492817b000SSage Weil }
7502817b000SSage Weil }
7512817b000SSage Weil }
7522817b000SSage Weil if (err)
7532817b000SSage Weil dentry = ERR_PTR(err);
7542817b000SSage Weil else if (dentry != req->r_dentry)
7552817b000SSage Weil dentry = dget(req->r_dentry); /* we got spliced */
7562817b000SSage Weil else
7572817b000SSage Weil dentry = NULL;
7582817b000SSage Weil return dentry;
7592817b000SSage Weil }
7602817b000SSage Weil
is_root_ceph_dentry(struct inode * inode,struct dentry * dentry)7613b33f692SZhang Zhuoyu static bool is_root_ceph_dentry(struct inode *inode, struct dentry *dentry)
7621d1de916SSage Weil {
7631d1de916SSage Weil return ceph_ino(inode) == CEPH_INO_ROOT &&
7641d1de916SSage Weil strncmp(dentry->d_name.name, ".ceph", 5) == 0;
7651d1de916SSage Weil }
7661d1de916SSage Weil
7672817b000SSage Weil /*
7682817b000SSage Weil * Look up a single dir entry. If there is a lookup intent, inform
7692817b000SSage Weil * the MDS so that it gets our 'caps wanted' value in a single op.
7702817b000SSage Weil */
ceph_lookup(struct inode * dir,struct dentry * dentry,unsigned int flags)7712817b000SSage Weil static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
77200cd8dd3SAl Viro unsigned int flags)
7732817b000SSage Weil {
774*985b9ee8SXiubo Li struct ceph_fs_client *fsc = ceph_sb_to_fs_client(dir->i_sb);
7752678da88SXiubo Li struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
7762817b000SSage Weil struct ceph_mds_request *req;
7772817b000SSage Weil int op;
778315f2408SYan, Zheng int mask;
7792817b000SSage Weil int err;
7802817b000SSage Weil
781a455589fSAl Viro dout("lookup %p dentry %p '%pd'\n",
782a455589fSAl Viro dir, dentry, dentry);
7832817b000SSage Weil
7842817b000SSage Weil if (dentry->d_name.len > NAME_MAX)
7852817b000SSage Weil return ERR_PTR(-ENAMETOOLONG);
7862817b000SSage Weil
787cb3524a8SJeff Layton if (IS_ENCRYPTED(dir)) {
788d9ae977dSLuís Henriques bool had_key = fscrypt_has_encryption_key(dir);
789d9ae977dSLuís Henriques
790d9ae977dSLuís Henriques err = fscrypt_prepare_lookup_partial(dir, dentry);
79114e034a6SLuís Henriques if (err < 0)
792cb3524a8SJeff Layton return ERR_PTR(err);
793d9ae977dSLuís Henriques
794d9ae977dSLuís Henriques /* mark directory as incomplete if it has been unlocked */
795d9ae977dSLuís Henriques if (!had_key && fscrypt_has_encryption_key(dir))
796d9ae977dSLuís Henriques ceph_dir_clear_complete(dir);
797cb3524a8SJeff Layton }
798cb3524a8SJeff Layton
7992817b000SSage Weil /* can we conclude ENOENT locally? */
8002b0143b5SDavid Howells if (d_really_is_negative(dentry)) {
8012817b000SSage Weil struct ceph_inode_info *ci = ceph_inode(dir);
8022817b000SSage Weil struct ceph_dentry_info *di = ceph_dentry(dentry);
8032817b000SSage Weil
804be655596SSage Weil spin_lock(&ci->i_ceph_lock);
805891f3f5aSJeff Layton dout(" dir %p flags are 0x%lx\n", dir, ci->i_ceph_flags);
8062817b000SSage Weil if (strncmp(dentry->d_name.name,
8073d14c5d2SYehuda Sadeh fsc->mount_options->snapdir_name,
8082817b000SSage Weil dentry->d_name.len) &&
8091d1de916SSage Weil !is_root_ceph_dentry(dir, dentry) &&
810e2c3de04SYan, Zheng ceph_test_mount_opt(fsc, DCACHE) &&
8112f276c51SYan, Zheng __ceph_dir_is_complete(ci) &&
8121af16d54SXiubo Li __ceph_caps_issued_mask_metric(ci, CEPH_CAP_FILE_SHARED, 1)) {
813719a2514SYan, Zheng __ceph_touch_fmode(ci, mdsc, CEPH_FILE_MODE_RD);
814be655596SSage Weil spin_unlock(&ci->i_ceph_lock);
8152817b000SSage Weil dout(" dir %p complete, -ENOENT\n", dir);
8162817b000SSage Weil d_add(dentry, NULL);
81797aeb6bfSYan, Zheng di->lease_shared_gen = atomic_read(&ci->i_shared_gen);
8182817b000SSage Weil return NULL;
8192817b000SSage Weil }
820be655596SSage Weil spin_unlock(&ci->i_ceph_lock);
8212817b000SSage Weil }
8222817b000SSage Weil
8232817b000SSage Weil op = ceph_snap(dir) == CEPH_SNAPDIR ?
8242817b000SSage Weil CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_LOOKUP;
8252817b000SSage Weil req = ceph_mdsc_create_request(mdsc, op, USE_ANY_MDS);
8262817b000SSage Weil if (IS_ERR(req))
8277e34bc52SJulia Lawall return ERR_CAST(req);
8282817b000SSage Weil req->r_dentry = dget(dentry);
8292817b000SSage Weil req->r_num_caps = 2;
830315f2408SYan, Zheng
831315f2408SYan, Zheng mask = CEPH_STAT_CAP_INODE | CEPH_CAP_AUTH_SHARED;
832315f2408SYan, Zheng if (ceph_security_xattr_wanted(dir))
833315f2408SYan, Zheng mask |= CEPH_CAP_XATTR_SHARED;
834315f2408SYan, Zheng req->r_args.getattr.mask = cpu_to_le32(mask);
835315f2408SYan, Zheng
8364c183472SJeff Layton ihold(dir);
8373dd69aabSJeff Layton req->r_parent = dir;
8383dd69aabSJeff Layton set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
8392817b000SSage Weil err = ceph_mdsc_do_request(mdsc, NULL, req);
8407a971e2cSJeff Layton if (err == -ENOENT) {
8417a971e2cSJeff Layton struct dentry *res;
8427a971e2cSJeff Layton
8437a971e2cSJeff Layton res = ceph_handle_snapdir(req, dentry);
844aa60cfc3SJeff Layton if (IS_ERR(res)) {
845aa60cfc3SJeff Layton err = PTR_ERR(res);
846aa60cfc3SJeff Layton } else {
847aa60cfc3SJeff Layton dentry = res;
848aa60cfc3SJeff Layton err = 0;
849aa60cfc3SJeff Layton }
8507a971e2cSJeff Layton }
8512817b000SSage Weil dentry = ceph_finish_lookup(req, dentry, err);
8522817b000SSage Weil ceph_mdsc_put_request(req); /* will dput(dentry) */
8532817b000SSage Weil dout("lookup result=%p\n", dentry);
8542817b000SSage Weil return dentry;
8552817b000SSage Weil }
8562817b000SSage Weil
8572817b000SSage Weil /*
8582817b000SSage Weil * If we do a create but get no trace back from the MDS, follow up with
8592817b000SSage Weil * a lookup (the VFS expects us to link up the provided dentry).
8602817b000SSage Weil */
ceph_handle_notrace_create(struct inode * dir,struct dentry * dentry)8612817b000SSage Weil int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry)
8622817b000SSage Weil {
86300cd8dd3SAl Viro struct dentry *result = ceph_lookup(dir, dentry, 0);
8642817b000SSage Weil
8652817b000SSage Weil if (result && !IS_ERR(result)) {
8662817b000SSage Weil /*
8672817b000SSage Weil * We created the item, then did a lookup, and found
8682817b000SSage Weil * it was already linked to another inode we already
8694d41cef2SYan, Zheng * had in our cache (and thus got spliced). To not
8704d41cef2SYan, Zheng * confuse VFS (especially when inode is a directory),
8714d41cef2SYan, Zheng * we don't link our dentry to that inode, return an
8724d41cef2SYan, Zheng * error instead.
8734d41cef2SYan, Zheng *
8744d41cef2SYan, Zheng * This event should be rare and it happens only when
8754d41cef2SYan, Zheng * we talk to old MDS. Recent MDS does not send traceless
8764d41cef2SYan, Zheng * reply for request that creates new inode.
8772817b000SSage Weil */
8785cba372cSYan, Zheng d_drop(result);
8794d41cef2SYan, Zheng return -ESTALE;
8802817b000SSage Weil }
8812817b000SSage Weil return PTR_ERR(result);
8822817b000SSage Weil }
8832817b000SSage Weil
ceph_mknod(struct mnt_idmap * idmap,struct inode * dir,struct dentry * dentry,umode_t mode,dev_t rdev)8845ebb29beSChristian Brauner static int ceph_mknod(struct mnt_idmap *idmap, struct inode *dir,
885549c7297SChristian Brauner struct dentry *dentry, umode_t mode, dev_t rdev)
8862817b000SSage Weil {
8872678da88SXiubo Li struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
8882817b000SSage Weil struct ceph_mds_request *req;
8895c31e92dSYan, Zheng struct ceph_acl_sec_ctx as_ctx = {};
8902817b000SSage Weil int err;
8912817b000SSage Weil
8922817b000SSage Weil if (ceph_snap(dir) != CEPH_NOSNAP)
8932817b000SSage Weil return -EROFS;
8942817b000SSage Weil
8954868e537SXiubo Li err = ceph_wait_on_conflict_unlink(dentry);
8964868e537SXiubo Li if (err)
8974868e537SXiubo Li return err;
8984868e537SXiubo Li
8990459871cSChengguang Xu if (ceph_quota_is_max_files_exceeded(dir)) {
9000459871cSChengguang Xu err = -EDQUOT;
9010459871cSChengguang Xu goto out;
9020459871cSChengguang Xu }
903b7a29217SLuis Henriques
9041a67aafbSAl Viro dout("mknod in dir %p dentry %p mode 0%ho rdev %d\n",
9052817b000SSage Weil dir, dentry, mode, rdev);
9062817b000SSage Weil req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_MKNOD, USE_AUTH_MDS);
9072817b000SSage Weil if (IS_ERR(req)) {
908b1ee94aaSYan, Zheng err = PTR_ERR(req);
909b1ee94aaSYan, Zheng goto out;
9102817b000SSage Weil }
911ec9595c0SJeff Layton
912ec9595c0SJeff Layton req->r_new_inode = ceph_new_inode(dir, dentry, &mode, &as_ctx);
913ec9595c0SJeff Layton if (IS_ERR(req->r_new_inode)) {
914ec9595c0SJeff Layton err = PTR_ERR(req->r_new_inode);
915ec9595c0SJeff Layton req->r_new_inode = NULL;
916ec9595c0SJeff Layton goto out_req;
917ec9595c0SJeff Layton }
918ec9595c0SJeff Layton
91916be62fcSJeff Layton if (S_ISREG(mode) && IS_ENCRYPTED(dir))
92016be62fcSJeff Layton set_bit(CEPH_MDS_R_FSCRYPT_FILE, &req->r_req_flags);
92116be62fcSJeff Layton
9222817b000SSage Weil req->r_dentry = dget(dentry);
9232817b000SSage Weil req->r_num_caps = 2;
9243dd69aabSJeff Layton req->r_parent = dir;
9254c183472SJeff Layton ihold(dir);
9263dd69aabSJeff Layton set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
9272817b000SSage Weil req->r_args.mknod.mode = cpu_to_le32(mode);
9282817b000SSage Weil req->r_args.mknod.rdev = cpu_to_le32(rdev);
929d9d00f71SXiubo Li req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL |
930d9d00f71SXiubo Li CEPH_CAP_XATTR_EXCL;
9312817b000SSage Weil req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
932ec9595c0SJeff Layton
933ec9595c0SJeff Layton ceph_as_ctx_to_req(req, &as_ctx);
934ec9595c0SJeff Layton
9352817b000SSage Weil err = ceph_mdsc_do_request(mdsc, dir, req);
9362817b000SSage Weil if (!err && !req->r_reply_info.head->is_dentry)
9372817b000SSage Weil err = ceph_handle_notrace_create(dir, dentry);
938ec9595c0SJeff Layton out_req:
9392817b000SSage Weil ceph_mdsc_put_request(req);
940b1ee94aaSYan, Zheng out:
9417221fe4cSGuangliang Zhao if (!err)
9425c31e92dSYan, Zheng ceph_init_inode_acls(d_inode(dentry), &as_ctx);
943b20a95a0SYan, Zheng else
9442817b000SSage Weil d_drop(dentry);
9455c31e92dSYan, Zheng ceph_release_acl_sec_ctx(&as_ctx);
9462817b000SSage Weil return err;
9472817b000SSage Weil }
9482817b000SSage Weil
ceph_create(struct mnt_idmap * idmap,struct inode * dir,struct dentry * dentry,umode_t mode,bool excl)9496c960e68SChristian Brauner static int ceph_create(struct mnt_idmap *idmap, struct inode *dir,
950549c7297SChristian Brauner struct dentry *dentry, umode_t mode, bool excl)
9512817b000SSage Weil {
9525ebb29beSChristian Brauner return ceph_mknod(idmap, dir, dentry, mode, 0);
9532817b000SSage Weil }
9542817b000SSage Weil
95579f2f6adSJeff Layton #if IS_ENABLED(CONFIG_FS_ENCRYPTION)
prep_encrypted_symlink_target(struct ceph_mds_request * req,const char * dest)95679f2f6adSJeff Layton static int prep_encrypted_symlink_target(struct ceph_mds_request *req,
95779f2f6adSJeff Layton const char *dest)
95879f2f6adSJeff Layton {
95979f2f6adSJeff Layton int err;
96079f2f6adSJeff Layton int len = strlen(dest);
96179f2f6adSJeff Layton struct fscrypt_str osd_link = FSTR_INIT(NULL, 0);
96279f2f6adSJeff Layton
96379f2f6adSJeff Layton err = fscrypt_prepare_symlink(req->r_parent, dest, len, PATH_MAX,
96479f2f6adSJeff Layton &osd_link);
96579f2f6adSJeff Layton if (err)
96679f2f6adSJeff Layton goto out;
96779f2f6adSJeff Layton
96879f2f6adSJeff Layton err = fscrypt_encrypt_symlink(req->r_new_inode, dest, len, &osd_link);
96979f2f6adSJeff Layton if (err)
97079f2f6adSJeff Layton goto out;
97179f2f6adSJeff Layton
97279f2f6adSJeff Layton req->r_path2 = kmalloc(CEPH_BASE64_CHARS(osd_link.len) + 1, GFP_KERNEL);
97379f2f6adSJeff Layton if (!req->r_path2) {
97479f2f6adSJeff Layton err = -ENOMEM;
97579f2f6adSJeff Layton goto out;
97679f2f6adSJeff Layton }
97779f2f6adSJeff Layton
97879f2f6adSJeff Layton len = ceph_base64_encode(osd_link.name, osd_link.len, req->r_path2);
97979f2f6adSJeff Layton req->r_path2[len] = '\0';
98079f2f6adSJeff Layton out:
98179f2f6adSJeff Layton fscrypt_fname_free_buffer(&osd_link);
98279f2f6adSJeff Layton return err;
98379f2f6adSJeff Layton }
98479f2f6adSJeff Layton #else
prep_encrypted_symlink_target(struct ceph_mds_request * req,const char * dest)98579f2f6adSJeff Layton static int prep_encrypted_symlink_target(struct ceph_mds_request *req,
98679f2f6adSJeff Layton const char *dest)
98779f2f6adSJeff Layton {
98879f2f6adSJeff Layton return -EOPNOTSUPP;
98979f2f6adSJeff Layton }
99079f2f6adSJeff Layton #endif
99179f2f6adSJeff Layton
ceph_symlink(struct mnt_idmap * idmap,struct inode * dir,struct dentry * dentry,const char * dest)9927a77db95SChristian Brauner static int ceph_symlink(struct mnt_idmap *idmap, struct inode *dir,
993549c7297SChristian Brauner struct dentry *dentry, const char *dest)
9942817b000SSage Weil {
9952678da88SXiubo Li struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
9962817b000SSage Weil struct ceph_mds_request *req;
997ac6713ccSYan, Zheng struct ceph_acl_sec_ctx as_ctx = {};
998ec9595c0SJeff Layton umode_t mode = S_IFLNK | 0777;
9992817b000SSage Weil int err;
10002817b000SSage Weil
10012817b000SSage Weil if (ceph_snap(dir) != CEPH_NOSNAP)
10022817b000SSage Weil return -EROFS;
10032817b000SSage Weil
10044868e537SXiubo Li err = ceph_wait_on_conflict_unlink(dentry);
10054868e537SXiubo Li if (err)
10064868e537SXiubo Li return err;
10074868e537SXiubo Li
100867fcd151SChengguang Xu if (ceph_quota_is_max_files_exceeded(dir)) {
100967fcd151SChengguang Xu err = -EDQUOT;
101067fcd151SChengguang Xu goto out;
101167fcd151SChengguang Xu }
1012b7a29217SLuis Henriques
10132817b000SSage Weil dout("symlink in dir %p dentry %p to '%s'\n", dir, dentry, dest);
10142817b000SSage Weil req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SYMLINK, USE_AUTH_MDS);
10152817b000SSage Weil if (IS_ERR(req)) {
1016b1ee94aaSYan, Zheng err = PTR_ERR(req);
1017b1ee94aaSYan, Zheng goto out;
10182817b000SSage Weil }
1019ec9595c0SJeff Layton
1020ec9595c0SJeff Layton req->r_new_inode = ceph_new_inode(dir, dentry, &mode, &as_ctx);
1021ec9595c0SJeff Layton if (IS_ERR(req->r_new_inode)) {
1022ec9595c0SJeff Layton err = PTR_ERR(req->r_new_inode);
1023ec9595c0SJeff Layton req->r_new_inode = NULL;
1024ec9595c0SJeff Layton goto out_req;
1025ec9595c0SJeff Layton }
1026ec9595c0SJeff Layton
102779f2f6adSJeff Layton req->r_parent = dir;
102879f2f6adSJeff Layton ihold(dir);
102979f2f6adSJeff Layton
103079f2f6adSJeff Layton if (IS_ENCRYPTED(req->r_new_inode)) {
103179f2f6adSJeff Layton err = prep_encrypted_symlink_target(req, dest);
103279f2f6adSJeff Layton if (err)
103379f2f6adSJeff Layton goto out_req;
103479f2f6adSJeff Layton } else {
1035687265e5SYan, Zheng req->r_path2 = kstrdup(dest, GFP_KERNEL);
1036a149bb9aSSanidhya Kashyap if (!req->r_path2) {
1037a149bb9aSSanidhya Kashyap err = -ENOMEM;
1038ec9595c0SJeff Layton goto out_req;
1039a149bb9aSSanidhya Kashyap }
104079f2f6adSJeff Layton }
10414c183472SJeff Layton
10423dd69aabSJeff Layton set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
10432817b000SSage Weil req->r_dentry = dget(dentry);
10442817b000SSage Weil req->r_num_caps = 2;
1045d9d00f71SXiubo Li req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL |
1046d9d00f71SXiubo Li CEPH_CAP_XATTR_EXCL;
10472817b000SSage Weil req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
1048ec9595c0SJeff Layton
1049ec9595c0SJeff Layton ceph_as_ctx_to_req(req, &as_ctx);
1050ec9595c0SJeff Layton
10512817b000SSage Weil err = ceph_mdsc_do_request(mdsc, dir, req);
10522817b000SSage Weil if (!err && !req->r_reply_info.head->is_dentry)
10532817b000SSage Weil err = ceph_handle_notrace_create(dir, dentry);
1054ec9595c0SJeff Layton out_req:
10552817b000SSage Weil ceph_mdsc_put_request(req);
1056b1ee94aaSYan, Zheng out:
1057b1ee94aaSYan, Zheng if (err)
10582817b000SSage Weil d_drop(dentry);
1059ac6713ccSYan, Zheng ceph_release_acl_sec_ctx(&as_ctx);
10602817b000SSage Weil return err;
10612817b000SSage Weil }
10622817b000SSage Weil
ceph_mkdir(struct mnt_idmap * idmap,struct inode * dir,struct dentry * dentry,umode_t mode)1063c54bd91eSChristian Brauner static int ceph_mkdir(struct mnt_idmap *idmap, struct inode *dir,
1064549c7297SChristian Brauner struct dentry *dentry, umode_t mode)
10652817b000SSage Weil {
10662678da88SXiubo Li struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
10672817b000SSage Weil struct ceph_mds_request *req;
10685c31e92dSYan, Zheng struct ceph_acl_sec_ctx as_ctx = {};
10694868e537SXiubo Li int err;
10702817b000SSage Weil int op;
10712817b000SSage Weil
10724868e537SXiubo Li err = ceph_wait_on_conflict_unlink(dentry);
10734868e537SXiubo Li if (err)
10744868e537SXiubo Li return err;
10754868e537SXiubo Li
10762817b000SSage Weil if (ceph_snap(dir) == CEPH_SNAPDIR) {
10772817b000SSage Weil /* mkdir .snap/foo is a MKSNAP */
10782817b000SSage Weil op = CEPH_MDS_OP_MKSNAP;
1079a455589fSAl Viro dout("mksnap dir %p snap '%pd' dn %p\n", dir,
1080a455589fSAl Viro dentry, dentry);
10812817b000SSage Weil } else if (ceph_snap(dir) == CEPH_NOSNAP) {
108218bb1db3SAl Viro dout("mkdir dir %p dn %p mode 0%ho\n", dir, dentry, mode);
10832817b000SSage Weil op = CEPH_MDS_OP_MKDIR;
10842817b000SSage Weil } else {
10854868e537SXiubo Li err = -EROFS;
10862817b000SSage Weil goto out;
10872817b000SSage Weil }
1088b1ee94aaSYan, Zheng
108925963669SYan, Zheng if (op == CEPH_MDS_OP_MKDIR &&
109025963669SYan, Zheng ceph_quota_is_max_files_exceeded(dir)) {
1091b7a29217SLuis Henriques err = -EDQUOT;
1092b7a29217SLuis Henriques goto out;
1093b7a29217SLuis Henriques }
1094abd4fc77SLuís Henriques if ((op == CEPH_MDS_OP_MKSNAP) && IS_ENCRYPTED(dir) &&
1095abd4fc77SLuís Henriques !fscrypt_has_encryption_key(dir)) {
1096abd4fc77SLuís Henriques err = -ENOKEY;
1097abd4fc77SLuís Henriques goto out;
1098abd4fc77SLuís Henriques }
1099b7a29217SLuis Henriques
1100b1ee94aaSYan, Zheng
11012817b000SSage Weil req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
11022817b000SSage Weil if (IS_ERR(req)) {
11032817b000SSage Weil err = PTR_ERR(req);
11042817b000SSage Weil goto out;
11052817b000SSage Weil }
11062817b000SSage Weil
1107ec9595c0SJeff Layton mode |= S_IFDIR;
1108ec9595c0SJeff Layton req->r_new_inode = ceph_new_inode(dir, dentry, &mode, &as_ctx);
1109ec9595c0SJeff Layton if (IS_ERR(req->r_new_inode)) {
1110ec9595c0SJeff Layton err = PTR_ERR(req->r_new_inode);
1111ec9595c0SJeff Layton req->r_new_inode = NULL;
1112ec9595c0SJeff Layton goto out_req;
1113ec9595c0SJeff Layton }
1114ec9595c0SJeff Layton
11152817b000SSage Weil req->r_dentry = dget(dentry);
11162817b000SSage Weil req->r_num_caps = 2;
11173dd69aabSJeff Layton req->r_parent = dir;
11184c183472SJeff Layton ihold(dir);
11193dd69aabSJeff Layton set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
11202817b000SSage Weil req->r_args.mkdir.mode = cpu_to_le32(mode);
1121d9d00f71SXiubo Li req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL |
1122d9d00f71SXiubo Li CEPH_CAP_XATTR_EXCL;
11232817b000SSage Weil req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
1124ec9595c0SJeff Layton
1125ec9595c0SJeff Layton ceph_as_ctx_to_req(req, &as_ctx);
1126ec9595c0SJeff Layton
11272817b000SSage Weil err = ceph_mdsc_do_request(mdsc, dir, req);
1128275dd19eSYan, Zheng if (!err &&
1129275dd19eSYan, Zheng !req->r_reply_info.head->is_target &&
1130275dd19eSYan, Zheng !req->r_reply_info.head->is_dentry)
11312817b000SSage Weil err = ceph_handle_notrace_create(dir, dentry);
1132ec9595c0SJeff Layton out_req:
11332817b000SSage Weil ceph_mdsc_put_request(req);
11342817b000SSage Weil out:
1135b20a95a0SYan, Zheng if (!err)
11365c31e92dSYan, Zheng ceph_init_inode_acls(d_inode(dentry), &as_ctx);
1137b20a95a0SYan, Zheng else
11382817b000SSage Weil d_drop(dentry);
11395c31e92dSYan, Zheng ceph_release_acl_sec_ctx(&as_ctx);
11402817b000SSage Weil return err;
11412817b000SSage Weil }
11422817b000SSage Weil
ceph_link(struct dentry * old_dentry,struct inode * dir,struct dentry * dentry)11432817b000SSage Weil static int ceph_link(struct dentry *old_dentry, struct inode *dir,
11442817b000SSage Weil struct dentry *dentry)
11452817b000SSage Weil {
11462678da88SXiubo Li struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
11472817b000SSage Weil struct ceph_mds_request *req;
11482817b000SSage Weil int err;
11492817b000SSage Weil
1150a5ffd7b6SXiubo Li if (dentry->d_flags & DCACHE_DISCONNECTED)
1151a5ffd7b6SXiubo Li return -EINVAL;
1152a5ffd7b6SXiubo Li
11534868e537SXiubo Li err = ceph_wait_on_conflict_unlink(dentry);
11544868e537SXiubo Li if (err)
11554868e537SXiubo Li return err;
11564868e537SXiubo Li
11572817b000SSage Weil if (ceph_snap(dir) != CEPH_NOSNAP)
11582817b000SSage Weil return -EROFS;
11592817b000SSage Weil
116094af0470SJeff Layton err = fscrypt_prepare_link(old_dentry, dir, dentry);
116194af0470SJeff Layton if (err)
116294af0470SJeff Layton return err;
116394af0470SJeff Layton
1164a5ffd7b6SXiubo Li dout("link in dir %p %llx.%llx old_dentry %p:'%pd' dentry %p:'%pd'\n",
1165a5ffd7b6SXiubo Li dir, ceph_vinop(dir), old_dentry, old_dentry, dentry, dentry);
11662817b000SSage Weil req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LINK, USE_AUTH_MDS);
11672817b000SSage Weil if (IS_ERR(req)) {
11682817b000SSage Weil d_drop(dentry);
11692817b000SSage Weil return PTR_ERR(req);
11702817b000SSage Weil }
11712817b000SSage Weil req->r_dentry = dget(dentry);
11722817b000SSage Weil req->r_num_caps = 2;
11734b58c9b1SSage Weil req->r_old_dentry = dget(old_dentry);
1174a5ffd7b6SXiubo Li /*
1175a5ffd7b6SXiubo Li * The old_dentry maybe a DCACHE_DISCONNECTED dentry, then we
1176a5ffd7b6SXiubo Li * will just pass the ino# to MDSs.
1177a5ffd7b6SXiubo Li */
1178a5ffd7b6SXiubo Li if (old_dentry->d_flags & DCACHE_DISCONNECTED)
1179a5ffd7b6SXiubo Li req->r_ino2 = ceph_vino(d_inode(old_dentry));
11803dd69aabSJeff Layton req->r_parent = dir;
11814c183472SJeff Layton ihold(dir);
11823dd69aabSJeff Layton set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
1183d9d00f71SXiubo Li req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_XATTR_EXCL;
11842817b000SSage Weil req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
1185ad88f23fSYan, Zheng /* release LINK_SHARED on source inode (mds will lock it) */
1186d19a0b54SYan, Zheng req->r_old_inode_drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL;
11872817b000SSage Weil err = ceph_mdsc_do_request(mdsc, dir, req);
118870b666c3SSage Weil if (err) {
11892817b000SSage Weil d_drop(dentry);
119070b666c3SSage Weil } else if (!req->r_reply_info.head->is_dentry) {
11912b0143b5SDavid Howells ihold(d_inode(old_dentry));
11922b0143b5SDavid Howells d_instantiate(dentry, d_inode(old_dentry));
119370b666c3SSage Weil }
11942817b000SSage Weil ceph_mdsc_put_request(req);
11952817b000SSage Weil return err;
11962817b000SSage Weil }
11972817b000SSage Weil
ceph_async_unlink_cb(struct ceph_mds_client * mdsc,struct ceph_mds_request * req)11982ccb4546SJeff Layton static void ceph_async_unlink_cb(struct ceph_mds_client *mdsc,
11992ccb4546SJeff Layton struct ceph_mds_request *req)
12002ccb4546SJeff Layton {
12014868e537SXiubo Li struct dentry *dentry = req->r_dentry;
1202*985b9ee8SXiubo Li struct ceph_fs_client *fsc = ceph_sb_to_fs_client(dentry->d_sb);
12034868e537SXiubo Li struct ceph_dentry_info *di = ceph_dentry(dentry);
12042ccb4546SJeff Layton int result = req->r_err ? req->r_err :
12052ccb4546SJeff Layton le32_to_cpu(req->r_reply_info.head->result);
12062ccb4546SJeff Layton
12074868e537SXiubo Li if (!test_bit(CEPH_DENTRY_ASYNC_UNLINK_BIT, &di->flags))
12084868e537SXiubo Li pr_warn("%s dentry %p:%pd async unlink bit is not set\n",
12094868e537SXiubo Li __func__, dentry, dentry);
12104868e537SXiubo Li
12114868e537SXiubo Li spin_lock(&fsc->async_unlink_conflict_lock);
12124868e537SXiubo Li hash_del_rcu(&di->hnode);
12134868e537SXiubo Li spin_unlock(&fsc->async_unlink_conflict_lock);
12144868e537SXiubo Li
12154868e537SXiubo Li spin_lock(&dentry->d_lock);
12164868e537SXiubo Li di->flags &= ~CEPH_DENTRY_ASYNC_UNLINK;
12174868e537SXiubo Li wake_up_bit(&di->flags, CEPH_DENTRY_ASYNC_UNLINK_BIT);
12184868e537SXiubo Li spin_unlock(&dentry->d_lock);
12194868e537SXiubo Li
12204868e537SXiubo Li synchronize_rcu();
12214868e537SXiubo Li
12222ccb4546SJeff Layton if (result == -EJUKEBOX)
12232ccb4546SJeff Layton goto out;
12242ccb4546SJeff Layton
12252ccb4546SJeff Layton /* If op failed, mark everyone involved for errors */
12262ccb4546SJeff Layton if (result) {
12272a575f13SJeff Layton int pathlen = 0;
12282a575f13SJeff Layton u64 base = 0;
12292e2023e9SXiubo Li char *path = ceph_mdsc_build_path(mdsc, dentry, &pathlen,
12302ccb4546SJeff Layton &base, 0);
12312ccb4546SJeff Layton
12322ccb4546SJeff Layton /* mark error on parent + clear complete */
12332ccb4546SJeff Layton mapping_set_error(req->r_parent->i_mapping, result);
12342ccb4546SJeff Layton ceph_dir_clear_complete(req->r_parent);
12352ccb4546SJeff Layton
12362ccb4546SJeff Layton /* drop the dentry -- we don't know its status */
12374868e537SXiubo Li if (!d_unhashed(dentry))
12384868e537SXiubo Li d_drop(dentry);
12392ccb4546SJeff Layton
12402ccb4546SJeff Layton /* mark inode itself for an error (since metadata is bogus) */
12412ccb4546SJeff Layton mapping_set_error(req->r_old_inode->i_mapping, result);
12422ccb4546SJeff Layton
12434868e537SXiubo Li pr_warn("async unlink failure path=(%llx)%s result=%d!\n",
12442ccb4546SJeff Layton base, IS_ERR(path) ? "<<bad>>" : path, result);
12452ccb4546SJeff Layton ceph_mdsc_free_path(path, pathlen);
12462ccb4546SJeff Layton }
12472ccb4546SJeff Layton out:
12482ccb4546SJeff Layton iput(req->r_old_inode);
12492ccb4546SJeff Layton ceph_mdsc_release_dir_caps(req);
12502ccb4546SJeff Layton }
12512ccb4546SJeff Layton
get_caps_for_async_unlink(struct inode * dir,struct dentry * dentry)12522ccb4546SJeff Layton static int get_caps_for_async_unlink(struct inode *dir, struct dentry *dentry)
12532ccb4546SJeff Layton {
12542ccb4546SJeff Layton struct ceph_inode_info *ci = ceph_inode(dir);
12552ccb4546SJeff Layton struct ceph_dentry_info *di;
12562ccb4546SJeff Layton int got = 0, want = CEPH_CAP_FILE_EXCL | CEPH_CAP_DIR_UNLINK;
12572ccb4546SJeff Layton
12582ccb4546SJeff Layton spin_lock(&ci->i_ceph_lock);
12592ccb4546SJeff Layton if ((__ceph_caps_issued(ci, NULL) & want) == want) {
12602ccb4546SJeff Layton ceph_take_cap_refs(ci, want, false);
12612ccb4546SJeff Layton got = want;
12622ccb4546SJeff Layton }
12632ccb4546SJeff Layton spin_unlock(&ci->i_ceph_lock);
12642ccb4546SJeff Layton
12652ccb4546SJeff Layton /* If we didn't get anything, return 0 */
12662ccb4546SJeff Layton if (!got)
12672ccb4546SJeff Layton return 0;
12682ccb4546SJeff Layton
12692ccb4546SJeff Layton spin_lock(&dentry->d_lock);
12702ccb4546SJeff Layton di = ceph_dentry(dentry);
12712ccb4546SJeff Layton /*
12722ccb4546SJeff Layton * - We are holding Fx, which implies Fs caps.
12732ccb4546SJeff Layton * - Only support async unlink for primary linkage
12742ccb4546SJeff Layton */
12752ccb4546SJeff Layton if (atomic_read(&ci->i_shared_gen) != di->lease_shared_gen ||
12762ccb4546SJeff Layton !(di->flags & CEPH_DENTRY_PRIMARY_LINK))
12772ccb4546SJeff Layton want = 0;
12782ccb4546SJeff Layton spin_unlock(&dentry->d_lock);
12792ccb4546SJeff Layton
12802ccb4546SJeff Layton /* Do we still want what we've got? */
12812ccb4546SJeff Layton if (want == got)
12822ccb4546SJeff Layton return got;
12832ccb4546SJeff Layton
12842ccb4546SJeff Layton ceph_put_cap_refs(ci, got);
12852ccb4546SJeff Layton return 0;
12862ccb4546SJeff Layton }
12872ccb4546SJeff Layton
12882817b000SSage Weil /*
12892817b000SSage Weil * rmdir and unlink are differ only by the metadata op code
12902817b000SSage Weil */
ceph_unlink(struct inode * dir,struct dentry * dentry)12912817b000SSage Weil static int ceph_unlink(struct inode *dir, struct dentry *dentry)
12922817b000SSage Weil {
1293*985b9ee8SXiubo Li struct ceph_fs_client *fsc = ceph_sb_to_fs_client(dir->i_sb);
12943d14c5d2SYehuda Sadeh struct ceph_mds_client *mdsc = fsc->mdsc;
12952b0143b5SDavid Howells struct inode *inode = d_inode(dentry);
12962817b000SSage Weil struct ceph_mds_request *req;
12972ccb4546SJeff Layton bool try_async = ceph_test_mount_opt(fsc, ASYNC_DIROPS);
12982817b000SSage Weil int err = -EROFS;
12992817b000SSage Weil int op;
13002817b000SSage Weil
13012817b000SSage Weil if (ceph_snap(dir) == CEPH_SNAPDIR) {
13022817b000SSage Weil /* rmdir .snap/foo is RMSNAP */
1303a455589fSAl Viro dout("rmsnap dir %p '%pd' dn %p\n", dir, dentry, dentry);
13042817b000SSage Weil op = CEPH_MDS_OP_RMSNAP;
13052817b000SSage Weil } else if (ceph_snap(dir) == CEPH_NOSNAP) {
13062817b000SSage Weil dout("unlink/rmdir dir %p dn %p inode %p\n",
13072817b000SSage Weil dir, dentry, inode);
1308e36cb0b8SDavid Howells op = d_is_dir(dentry) ?
13092817b000SSage Weil CEPH_MDS_OP_RMDIR : CEPH_MDS_OP_UNLINK;
13102817b000SSage Weil } else
13112817b000SSage Weil goto out;
13122ccb4546SJeff Layton retry:
13132817b000SSage Weil req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
13142817b000SSage Weil if (IS_ERR(req)) {
13152817b000SSage Weil err = PTR_ERR(req);
13162817b000SSage Weil goto out;
13172817b000SSage Weil }
13182817b000SSage Weil req->r_dentry = dget(dentry);
13192817b000SSage Weil req->r_num_caps = 2;
13203dd69aabSJeff Layton req->r_parent = dir;
13214c183472SJeff Layton ihold(dir);
1322d9d00f71SXiubo Li req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_XATTR_EXCL;
13232817b000SSage Weil req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
13246ef0bc6dSZhi Zhang req->r_inode_drop = ceph_drop_caps_for_unlink(inode);
13252ccb4546SJeff Layton
13262ccb4546SJeff Layton if (try_async && op == CEPH_MDS_OP_UNLINK &&
13272ccb4546SJeff Layton (req->r_dir_caps = get_caps_for_async_unlink(dir, dentry))) {
13284868e537SXiubo Li struct ceph_dentry_info *di = ceph_dentry(dentry);
13294868e537SXiubo Li
1330ebce3eb2SJeff Layton dout("async unlink on %llu/%.*s caps=%s", ceph_ino(dir),
13312ccb4546SJeff Layton dentry->d_name.len, dentry->d_name.name,
13322ccb4546SJeff Layton ceph_cap_string(req->r_dir_caps));
13332ccb4546SJeff Layton set_bit(CEPH_MDS_R_ASYNC, &req->r_req_flags);
13342ccb4546SJeff Layton req->r_callback = ceph_async_unlink_cb;
13352ccb4546SJeff Layton req->r_old_inode = d_inode(dentry);
13362ccb4546SJeff Layton ihold(req->r_old_inode);
13374868e537SXiubo Li
13384868e537SXiubo Li spin_lock(&dentry->d_lock);
13394868e537SXiubo Li di->flags |= CEPH_DENTRY_ASYNC_UNLINK;
13404868e537SXiubo Li spin_unlock(&dentry->d_lock);
13414868e537SXiubo Li
13424868e537SXiubo Li spin_lock(&fsc->async_unlink_conflict_lock);
13434868e537SXiubo Li hash_add_rcu(fsc->async_unlink_conflict, &di->hnode,
13444868e537SXiubo Li dentry->d_name.hash);
13454868e537SXiubo Li spin_unlock(&fsc->async_unlink_conflict_lock);
13464868e537SXiubo Li
13472ccb4546SJeff Layton err = ceph_mdsc_submit_request(mdsc, dir, req);
13482ccb4546SJeff Layton if (!err) {
13492ccb4546SJeff Layton /*
13502ccb4546SJeff Layton * We have enough caps, so we assume that the unlink
13512ccb4546SJeff Layton * will succeed. Fix up the target inode and dcache.
13522ccb4546SJeff Layton */
13532ccb4546SJeff Layton drop_nlink(inode);
13542ccb4546SJeff Layton d_delete(dentry);
13554868e537SXiubo Li } else {
13564868e537SXiubo Li spin_lock(&fsc->async_unlink_conflict_lock);
13574868e537SXiubo Li hash_del_rcu(&di->hnode);
13584868e537SXiubo Li spin_unlock(&fsc->async_unlink_conflict_lock);
13594868e537SXiubo Li
13604868e537SXiubo Li spin_lock(&dentry->d_lock);
13614868e537SXiubo Li di->flags &= ~CEPH_DENTRY_ASYNC_UNLINK;
13624868e537SXiubo Li spin_unlock(&dentry->d_lock);
13634868e537SXiubo Li
13644868e537SXiubo Li if (err == -EJUKEBOX) {
13652ccb4546SJeff Layton try_async = false;
13662ccb4546SJeff Layton ceph_mdsc_put_request(req);
13672ccb4546SJeff Layton goto retry;
13682ccb4546SJeff Layton }
13694868e537SXiubo Li }
13702ccb4546SJeff Layton } else {
13712ccb4546SJeff Layton set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
13722817b000SSage Weil err = ceph_mdsc_do_request(mdsc, dir, req);
13732817b000SSage Weil if (!err && !req->r_reply_info.head->is_dentry)
13742817b000SSage Weil d_delete(dentry);
13752ccb4546SJeff Layton }
13762ccb4546SJeff Layton
13772817b000SSage Weil ceph_mdsc_put_request(req);
13782817b000SSage Weil out:
13792817b000SSage Weil return err;
13802817b000SSage Weil }
13812817b000SSage Weil
ceph_rename(struct mnt_idmap * idmap,struct inode * old_dir,struct dentry * old_dentry,struct inode * new_dir,struct dentry * new_dentry,unsigned int flags)1382e18275aeSChristian Brauner static int ceph_rename(struct mnt_idmap *idmap, struct inode *old_dir,
1383549c7297SChristian Brauner struct dentry *old_dentry, struct inode *new_dir,
1384549c7297SChristian Brauner struct dentry *new_dentry, unsigned int flags)
13852817b000SSage Weil {
13862678da88SXiubo Li struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(old_dir->i_sb);
13872817b000SSage Weil struct ceph_mds_request *req;
13880ea611a3SYan, Zheng int op = CEPH_MDS_OP_RENAME;
13892817b000SSage Weil int err;
13902817b000SSage Weil
13911cd66c93SMiklos Szeredi if (flags)
13921cd66c93SMiklos Szeredi return -EINVAL;
13931cd66c93SMiklos Szeredi
13942817b000SSage Weil if (ceph_snap(old_dir) != ceph_snap(new_dir))
13952817b000SSage Weil return -EXDEV;
13960ea611a3SYan, Zheng if (ceph_snap(old_dir) != CEPH_NOSNAP) {
13970ea611a3SYan, Zheng if (old_dir == new_dir && ceph_snap(old_dir) == CEPH_SNAPDIR)
13980ea611a3SYan, Zheng op = CEPH_MDS_OP_RENAMESNAP;
13990ea611a3SYan, Zheng else
14002817b000SSage Weil return -EROFS;
14010ea611a3SYan, Zheng }
14026646ea1cSLuis Henriques /* don't allow cross-quota renames */
14036646ea1cSLuis Henriques if ((old_dir != new_dir) &&
14046646ea1cSLuis Henriques (!ceph_quota_is_same_realm(old_dir, new_dir)))
14056646ea1cSLuis Henriques return -EXDEV;
1406cafe21a4SLuis Henriques
14074868e537SXiubo Li err = ceph_wait_on_conflict_unlink(new_dentry);
14084868e537SXiubo Li if (err)
14094868e537SXiubo Li return err;
14104868e537SXiubo Li
141194af0470SJeff Layton err = fscrypt_prepare_rename(old_dir, old_dentry, new_dir, new_dentry,
141294af0470SJeff Layton flags);
141394af0470SJeff Layton if (err)
141494af0470SJeff Layton return err;
141594af0470SJeff Layton
14162817b000SSage Weil dout("rename dir %p dentry %p to dir %p dentry %p\n",
14172817b000SSage Weil old_dir, old_dentry, new_dir, new_dentry);
14180ea611a3SYan, Zheng req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
14192817b000SSage Weil if (IS_ERR(req))
14202817b000SSage Weil return PTR_ERR(req);
1421180061a5SSage Weil ihold(old_dir);
14222817b000SSage Weil req->r_dentry = dget(new_dentry);
14232817b000SSage Weil req->r_num_caps = 2;
14242817b000SSage Weil req->r_old_dentry = dget(old_dentry);
1425180061a5SSage Weil req->r_old_dentry_dir = old_dir;
14263dd69aabSJeff Layton req->r_parent = new_dir;
14274c183472SJeff Layton ihold(new_dir);
14283dd69aabSJeff Layton set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
1429d9d00f71SXiubo Li req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_XATTR_EXCL;
14302817b000SSage Weil req->r_old_dentry_unless = CEPH_CAP_FILE_EXCL;
1431d9d00f71SXiubo Li req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_XATTR_EXCL;
14322817b000SSage Weil req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
14332817b000SSage Weil /* release LINK_RDCACHE on source inode (mds will lock it) */
1434d19a0b54SYan, Zheng req->r_old_inode_drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL;
14356ef0bc6dSZhi Zhang if (d_really_is_positive(new_dentry)) {
14366ef0bc6dSZhi Zhang req->r_inode_drop =
14376ef0bc6dSZhi Zhang ceph_drop_caps_for_unlink(d_inode(new_dentry));
14386ef0bc6dSZhi Zhang }
14392817b000SSage Weil err = ceph_mdsc_do_request(mdsc, old_dir, req);
14402817b000SSage Weil if (!err && !req->r_reply_info.head->is_dentry) {
14412817b000SSage Weil /*
14422817b000SSage Weil * Normally d_move() is done by fill_trace (called by
14432817b000SSage Weil * do_request, above). If there is no trace, we need
14442817b000SSage Weil * to do it here.
14452817b000SSage Weil */
14462817b000SSage Weil d_move(old_dentry, new_dentry);
14472817b000SSage Weil }
14482817b000SSage Weil ceph_mdsc_put_request(req);
14492817b000SSage Weil return err;
14502817b000SSage Weil }
14512817b000SSage Weil
145281a6cf2dSSage Weil /*
145337c4efc1SYan, Zheng * Move dentry to tail of mdsc->dentry_leases list when lease is updated.
145437c4efc1SYan, Zheng * Leases at front of the list will expire first. (Assume all leases have
145537c4efc1SYan, Zheng * similar duration)
145637c4efc1SYan, Zheng *
145737c4efc1SYan, Zheng * Called under dentry->d_lock.
145837c4efc1SYan, Zheng */
__ceph_dentry_lease_touch(struct ceph_dentry_info * di)145937c4efc1SYan, Zheng void __ceph_dentry_lease_touch(struct ceph_dentry_info *di)
146037c4efc1SYan, Zheng {
146137c4efc1SYan, Zheng struct dentry *dn = di->dentry;
146237c4efc1SYan, Zheng struct ceph_mds_client *mdsc;
146337c4efc1SYan, Zheng
146437c4efc1SYan, Zheng dout("dentry_lease_touch %p %p '%pd'\n", di, dn, dn);
146537c4efc1SYan, Zheng
146637c4efc1SYan, Zheng di->flags |= CEPH_DENTRY_LEASE_LIST;
146737c4efc1SYan, Zheng if (di->flags & CEPH_DENTRY_SHRINK_LIST) {
146837c4efc1SYan, Zheng di->flags |= CEPH_DENTRY_REFERENCED;
146937c4efc1SYan, Zheng return;
147037c4efc1SYan, Zheng }
147137c4efc1SYan, Zheng
1472*985b9ee8SXiubo Li mdsc = ceph_sb_to_fs_client(dn->d_sb)->mdsc;
147337c4efc1SYan, Zheng spin_lock(&mdsc->dentry_list_lock);
147437c4efc1SYan, Zheng list_move_tail(&di->lease_list, &mdsc->dentry_leases);
147537c4efc1SYan, Zheng spin_unlock(&mdsc->dentry_list_lock);
147637c4efc1SYan, Zheng }
147737c4efc1SYan, Zheng
__dentry_dir_lease_touch(struct ceph_mds_client * mdsc,struct ceph_dentry_info * di)147837c4efc1SYan, Zheng static void __dentry_dir_lease_touch(struct ceph_mds_client* mdsc,
147937c4efc1SYan, Zheng struct ceph_dentry_info *di)
148037c4efc1SYan, Zheng {
148137c4efc1SYan, Zheng di->flags &= ~(CEPH_DENTRY_LEASE_LIST | CEPH_DENTRY_REFERENCED);
148237c4efc1SYan, Zheng di->lease_gen = 0;
148337c4efc1SYan, Zheng di->time = jiffies;
148437c4efc1SYan, Zheng list_move_tail(&di->lease_list, &mdsc->dentry_dir_leases);
148537c4efc1SYan, Zheng }
148637c4efc1SYan, Zheng
148737c4efc1SYan, Zheng /*
148837c4efc1SYan, Zheng * When dir lease is used, add dentry to tail of mdsc->dentry_dir_leases
148937c4efc1SYan, Zheng * list if it's not in the list, otherwise set 'referenced' flag.
149037c4efc1SYan, Zheng *
149137c4efc1SYan, Zheng * Called under dentry->d_lock.
149237c4efc1SYan, Zheng */
__ceph_dentry_dir_lease_touch(struct ceph_dentry_info * di)149337c4efc1SYan, Zheng void __ceph_dentry_dir_lease_touch(struct ceph_dentry_info *di)
149437c4efc1SYan, Zheng {
149537c4efc1SYan, Zheng struct dentry *dn = di->dentry;
149637c4efc1SYan, Zheng struct ceph_mds_client *mdsc;
149737c4efc1SYan, Zheng
14980eb30853SXiubo Li dout("dentry_dir_lease_touch %p %p '%pd' (offset 0x%llx)\n",
149937c4efc1SYan, Zheng di, dn, dn, di->offset);
150037c4efc1SYan, Zheng
150137c4efc1SYan, Zheng if (!list_empty(&di->lease_list)) {
150237c4efc1SYan, Zheng if (di->flags & CEPH_DENTRY_LEASE_LIST) {
150337c4efc1SYan, Zheng /* don't remove dentry from dentry lease list
150437c4efc1SYan, Zheng * if its lease is valid */
150537c4efc1SYan, Zheng if (__dentry_lease_is_valid(di))
150637c4efc1SYan, Zheng return;
150737c4efc1SYan, Zheng } else {
150837c4efc1SYan, Zheng di->flags |= CEPH_DENTRY_REFERENCED;
150937c4efc1SYan, Zheng return;
151037c4efc1SYan, Zheng }
151137c4efc1SYan, Zheng }
151237c4efc1SYan, Zheng
151337c4efc1SYan, Zheng if (di->flags & CEPH_DENTRY_SHRINK_LIST) {
151437c4efc1SYan, Zheng di->flags |= CEPH_DENTRY_REFERENCED;
151537c4efc1SYan, Zheng di->flags &= ~CEPH_DENTRY_LEASE_LIST;
151637c4efc1SYan, Zheng return;
151737c4efc1SYan, Zheng }
151837c4efc1SYan, Zheng
1519*985b9ee8SXiubo Li mdsc = ceph_sb_to_fs_client(dn->d_sb)->mdsc;
152037c4efc1SYan, Zheng spin_lock(&mdsc->dentry_list_lock);
152137c4efc1SYan, Zheng __dentry_dir_lease_touch(mdsc, di),
152237c4efc1SYan, Zheng spin_unlock(&mdsc->dentry_list_lock);
152337c4efc1SYan, Zheng }
152437c4efc1SYan, Zheng
__dentry_lease_unlist(struct ceph_dentry_info * di)152537c4efc1SYan, Zheng static void __dentry_lease_unlist(struct ceph_dentry_info *di)
152637c4efc1SYan, Zheng {
152737c4efc1SYan, Zheng struct ceph_mds_client *mdsc;
152837c4efc1SYan, Zheng if (di->flags & CEPH_DENTRY_SHRINK_LIST)
152937c4efc1SYan, Zheng return;
153037c4efc1SYan, Zheng if (list_empty(&di->lease_list))
153137c4efc1SYan, Zheng return;
153237c4efc1SYan, Zheng
1533*985b9ee8SXiubo Li mdsc = ceph_sb_to_fs_client(di->dentry->d_sb)->mdsc;
153437c4efc1SYan, Zheng spin_lock(&mdsc->dentry_list_lock);
153537c4efc1SYan, Zheng list_del_init(&di->lease_list);
153637c4efc1SYan, Zheng spin_unlock(&mdsc->dentry_list_lock);
153737c4efc1SYan, Zheng }
153837c4efc1SYan, Zheng
153937c4efc1SYan, Zheng enum {
154037c4efc1SYan, Zheng KEEP = 0,
154137c4efc1SYan, Zheng DELETE = 1,
154237c4efc1SYan, Zheng TOUCH = 2,
154337c4efc1SYan, Zheng STOP = 4,
154437c4efc1SYan, Zheng };
154537c4efc1SYan, Zheng
154637c4efc1SYan, Zheng struct ceph_lease_walk_control {
154737c4efc1SYan, Zheng bool dir_lease;
1548fe33032dSYan, Zheng bool expire_dir_lease;
154937c4efc1SYan, Zheng unsigned long nr_to_scan;
155037c4efc1SYan, Zheng unsigned long dir_lease_ttl;
155137c4efc1SYan, Zheng };
155237c4efc1SYan, Zheng
155337c4efc1SYan, Zheng static unsigned long
__dentry_leases_walk(struct ceph_mds_client * mdsc,struct ceph_lease_walk_control * lwc,int (* check)(struct dentry *,void *))155437c4efc1SYan, Zheng __dentry_leases_walk(struct ceph_mds_client *mdsc,
155537c4efc1SYan, Zheng struct ceph_lease_walk_control *lwc,
155637c4efc1SYan, Zheng int (*check)(struct dentry*, void*))
155737c4efc1SYan, Zheng {
155837c4efc1SYan, Zheng struct ceph_dentry_info *di, *tmp;
155937c4efc1SYan, Zheng struct dentry *dentry, *last = NULL;
156037c4efc1SYan, Zheng struct list_head* list;
156137c4efc1SYan, Zheng LIST_HEAD(dispose);
156237c4efc1SYan, Zheng unsigned long freed = 0;
156337c4efc1SYan, Zheng int ret = 0;
156437c4efc1SYan, Zheng
156537c4efc1SYan, Zheng list = lwc->dir_lease ? &mdsc->dentry_dir_leases : &mdsc->dentry_leases;
156637c4efc1SYan, Zheng spin_lock(&mdsc->dentry_list_lock);
156737c4efc1SYan, Zheng list_for_each_entry_safe(di, tmp, list, lease_list) {
156837c4efc1SYan, Zheng if (!lwc->nr_to_scan)
156937c4efc1SYan, Zheng break;
157037c4efc1SYan, Zheng --lwc->nr_to_scan;
157137c4efc1SYan, Zheng
157237c4efc1SYan, Zheng dentry = di->dentry;
157337c4efc1SYan, Zheng if (last == dentry)
157437c4efc1SYan, Zheng break;
157537c4efc1SYan, Zheng
157637c4efc1SYan, Zheng if (!spin_trylock(&dentry->d_lock))
157737c4efc1SYan, Zheng continue;
157837c4efc1SYan, Zheng
1579516162b9SAl Viro if (__lockref_is_dead(&dentry->d_lockref)) {
158037c4efc1SYan, Zheng list_del_init(&di->lease_list);
158137c4efc1SYan, Zheng goto next;
158237c4efc1SYan, Zheng }
158337c4efc1SYan, Zheng
158437c4efc1SYan, Zheng ret = check(dentry, lwc);
158537c4efc1SYan, Zheng if (ret & TOUCH) {
158637c4efc1SYan, Zheng /* move it into tail of dir lease list */
158737c4efc1SYan, Zheng __dentry_dir_lease_touch(mdsc, di);
158837c4efc1SYan, Zheng if (!last)
158937c4efc1SYan, Zheng last = dentry;
159037c4efc1SYan, Zheng }
159137c4efc1SYan, Zheng if (ret & DELETE) {
159237c4efc1SYan, Zheng /* stale lease */
159337c4efc1SYan, Zheng di->flags &= ~CEPH_DENTRY_REFERENCED;
159437c4efc1SYan, Zheng if (dentry->d_lockref.count > 0) {
159537c4efc1SYan, Zheng /* update_dentry_lease() will re-add
159637c4efc1SYan, Zheng * it to lease list, or
159737c4efc1SYan, Zheng * ceph_d_delete() will return 1 when
159837c4efc1SYan, Zheng * last reference is dropped */
159937c4efc1SYan, Zheng list_del_init(&di->lease_list);
160037c4efc1SYan, Zheng } else {
160137c4efc1SYan, Zheng di->flags |= CEPH_DENTRY_SHRINK_LIST;
160237c4efc1SYan, Zheng list_move_tail(&di->lease_list, &dispose);
160337c4efc1SYan, Zheng dget_dlock(dentry);
160437c4efc1SYan, Zheng }
160537c4efc1SYan, Zheng }
160637c4efc1SYan, Zheng next:
160737c4efc1SYan, Zheng spin_unlock(&dentry->d_lock);
160837c4efc1SYan, Zheng if (ret & STOP)
160937c4efc1SYan, Zheng break;
161037c4efc1SYan, Zheng }
161137c4efc1SYan, Zheng spin_unlock(&mdsc->dentry_list_lock);
161237c4efc1SYan, Zheng
161337c4efc1SYan, Zheng while (!list_empty(&dispose)) {
161437c4efc1SYan, Zheng di = list_first_entry(&dispose, struct ceph_dentry_info,
161537c4efc1SYan, Zheng lease_list);
161637c4efc1SYan, Zheng dentry = di->dentry;
161737c4efc1SYan, Zheng spin_lock(&dentry->d_lock);
161837c4efc1SYan, Zheng
161937c4efc1SYan, Zheng list_del_init(&di->lease_list);
162037c4efc1SYan, Zheng di->flags &= ~CEPH_DENTRY_SHRINK_LIST;
162137c4efc1SYan, Zheng if (di->flags & CEPH_DENTRY_REFERENCED) {
162237c4efc1SYan, Zheng spin_lock(&mdsc->dentry_list_lock);
162337c4efc1SYan, Zheng if (di->flags & CEPH_DENTRY_LEASE_LIST) {
162437c4efc1SYan, Zheng list_add_tail(&di->lease_list,
162537c4efc1SYan, Zheng &mdsc->dentry_leases);
162637c4efc1SYan, Zheng } else {
162737c4efc1SYan, Zheng __dentry_dir_lease_touch(mdsc, di);
162837c4efc1SYan, Zheng }
162937c4efc1SYan, Zheng spin_unlock(&mdsc->dentry_list_lock);
163037c4efc1SYan, Zheng } else {
163137c4efc1SYan, Zheng freed++;
163237c4efc1SYan, Zheng }
163337c4efc1SYan, Zheng
163437c4efc1SYan, Zheng spin_unlock(&dentry->d_lock);
163537c4efc1SYan, Zheng /* ceph_d_delete() does the trick */
163637c4efc1SYan, Zheng dput(dentry);
163737c4efc1SYan, Zheng }
163837c4efc1SYan, Zheng return freed;
163937c4efc1SYan, Zheng }
164037c4efc1SYan, Zheng
__dentry_lease_check(struct dentry * dentry,void * arg)164137c4efc1SYan, Zheng static int __dentry_lease_check(struct dentry *dentry, void *arg)
164237c4efc1SYan, Zheng {
164337c4efc1SYan, Zheng struct ceph_dentry_info *di = ceph_dentry(dentry);
164437c4efc1SYan, Zheng int ret;
164537c4efc1SYan, Zheng
164637c4efc1SYan, Zheng if (__dentry_lease_is_valid(di))
164737c4efc1SYan, Zheng return STOP;
164837c4efc1SYan, Zheng ret = __dir_lease_try_check(dentry);
164937c4efc1SYan, Zheng if (ret == -EBUSY)
165037c4efc1SYan, Zheng return KEEP;
165137c4efc1SYan, Zheng if (ret > 0)
165237c4efc1SYan, Zheng return TOUCH;
165337c4efc1SYan, Zheng return DELETE;
165437c4efc1SYan, Zheng }
165537c4efc1SYan, Zheng
__dir_lease_check(struct dentry * dentry,void * arg)165637c4efc1SYan, Zheng static int __dir_lease_check(struct dentry *dentry, void *arg)
165737c4efc1SYan, Zheng {
165837c4efc1SYan, Zheng struct ceph_lease_walk_control *lwc = arg;
165937c4efc1SYan, Zheng struct ceph_dentry_info *di = ceph_dentry(dentry);
166037c4efc1SYan, Zheng
166137c4efc1SYan, Zheng int ret = __dir_lease_try_check(dentry);
166237c4efc1SYan, Zheng if (ret == -EBUSY)
166337c4efc1SYan, Zheng return KEEP;
166437c4efc1SYan, Zheng if (ret > 0) {
166537c4efc1SYan, Zheng if (time_before(jiffies, di->time + lwc->dir_lease_ttl))
166637c4efc1SYan, Zheng return STOP;
166737c4efc1SYan, Zheng /* Move dentry to tail of dir lease list if we don't want
166837c4efc1SYan, Zheng * to delete it. So dentries in the list are checked in a
166937c4efc1SYan, Zheng * round robin manner */
1670fe33032dSYan, Zheng if (!lwc->expire_dir_lease)
167137c4efc1SYan, Zheng return TOUCH;
1672fe33032dSYan, Zheng if (dentry->d_lockref.count > 0 ||
1673fe33032dSYan, Zheng (di->flags & CEPH_DENTRY_REFERENCED))
1674fe33032dSYan, Zheng return TOUCH;
1675fe33032dSYan, Zheng /* invalidate dir lease */
1676fe33032dSYan, Zheng di->lease_shared_gen = 0;
167737c4efc1SYan, Zheng }
167837c4efc1SYan, Zheng return DELETE;
167937c4efc1SYan, Zheng }
168037c4efc1SYan, Zheng
ceph_trim_dentries(struct ceph_mds_client * mdsc)168137c4efc1SYan, Zheng int ceph_trim_dentries(struct ceph_mds_client *mdsc)
168237c4efc1SYan, Zheng {
168337c4efc1SYan, Zheng struct ceph_lease_walk_control lwc;
1684fe33032dSYan, Zheng unsigned long count;
168537c4efc1SYan, Zheng unsigned long freed;
168637c4efc1SYan, Zheng
1687fe33032dSYan, Zheng spin_lock(&mdsc->caps_list_lock);
1688fe33032dSYan, Zheng if (mdsc->caps_use_max > 0 &&
1689fe33032dSYan, Zheng mdsc->caps_use_count > mdsc->caps_use_max)
1690fe33032dSYan, Zheng count = mdsc->caps_use_count - mdsc->caps_use_max;
1691fe33032dSYan, Zheng else
1692fe33032dSYan, Zheng count = 0;
1693fe33032dSYan, Zheng spin_unlock(&mdsc->caps_list_lock);
1694fe33032dSYan, Zheng
169537c4efc1SYan, Zheng lwc.dir_lease = false;
169637c4efc1SYan, Zheng lwc.nr_to_scan = CEPH_CAPS_PER_RELEASE * 2;
169737c4efc1SYan, Zheng freed = __dentry_leases_walk(mdsc, &lwc, __dentry_lease_check);
169837c4efc1SYan, Zheng if (!lwc.nr_to_scan) /* more invalid leases */
169937c4efc1SYan, Zheng return -EAGAIN;
170037c4efc1SYan, Zheng
170137c4efc1SYan, Zheng if (lwc.nr_to_scan < CEPH_CAPS_PER_RELEASE)
170237c4efc1SYan, Zheng lwc.nr_to_scan = CEPH_CAPS_PER_RELEASE;
170337c4efc1SYan, Zheng
170437c4efc1SYan, Zheng lwc.dir_lease = true;
1705fe33032dSYan, Zheng lwc.expire_dir_lease = freed < count;
1706fe33032dSYan, Zheng lwc.dir_lease_ttl = mdsc->fsc->mount_options->caps_wanted_delay_max * HZ;
170737c4efc1SYan, Zheng freed +=__dentry_leases_walk(mdsc, &lwc, __dir_lease_check);
170837c4efc1SYan, Zheng if (!lwc.nr_to_scan) /* more to check */
170937c4efc1SYan, Zheng return -EAGAIN;
171037c4efc1SYan, Zheng
171137c4efc1SYan, Zheng return freed > 0 ? 1 : 0;
171237c4efc1SYan, Zheng }
171337c4efc1SYan, Zheng
171437c4efc1SYan, Zheng /*
171581a6cf2dSSage Weil * Ensure a dentry lease will no longer revalidate.
171681a6cf2dSSage Weil */
ceph_invalidate_dentry_lease(struct dentry * dentry)171781a6cf2dSSage Weil void ceph_invalidate_dentry_lease(struct dentry *dentry)
171881a6cf2dSSage Weil {
171937c4efc1SYan, Zheng struct ceph_dentry_info *di = ceph_dentry(dentry);
172081a6cf2dSSage Weil spin_lock(&dentry->d_lock);
172137c4efc1SYan, Zheng di->time = jiffies;
172237c4efc1SYan, Zheng di->lease_shared_gen = 0;
1723f5e17aedSJeff Layton di->flags &= ~CEPH_DENTRY_PRIMARY_LINK;
172437c4efc1SYan, Zheng __dentry_lease_unlist(di);
172581a6cf2dSSage Weil spin_unlock(&dentry->d_lock);
172681a6cf2dSSage Weil }
17272817b000SSage Weil
17282817b000SSage Weil /*
17292817b000SSage Weil * Check if dentry lease is valid. If not, delete the lease. Try to
17302817b000SSage Weil * renew if the least is more than half up.
17312817b000SSage Weil */
__dentry_lease_is_valid(struct ceph_dentry_info * di)17321e9c2eb6SYan, Zheng static bool __dentry_lease_is_valid(struct ceph_dentry_info *di)
17331e9c2eb6SYan, Zheng {
17341e9c2eb6SYan, Zheng struct ceph_mds_session *session;
17351e9c2eb6SYan, Zheng
17361e9c2eb6SYan, Zheng if (!di->lease_gen)
17371e9c2eb6SYan, Zheng return false;
17381e9c2eb6SYan, Zheng
17391e9c2eb6SYan, Zheng session = di->lease_session;
17401e9c2eb6SYan, Zheng if (session) {
17411e9c2eb6SYan, Zheng u32 gen;
17421e9c2eb6SYan, Zheng unsigned long ttl;
17431e9c2eb6SYan, Zheng
174452d60f8eSJeff Layton gen = atomic_read(&session->s_cap_gen);
17451e9c2eb6SYan, Zheng ttl = session->s_cap_ttl;
17461e9c2eb6SYan, Zheng
17471e9c2eb6SYan, Zheng if (di->lease_gen == gen &&
17481e9c2eb6SYan, Zheng time_before(jiffies, ttl) &&
17491e9c2eb6SYan, Zheng time_before(jiffies, di->time))
17501e9c2eb6SYan, Zheng return true;
17511e9c2eb6SYan, Zheng }
17521e9c2eb6SYan, Zheng di->lease_gen = 0;
17531e9c2eb6SYan, Zheng return false;
17541e9c2eb6SYan, Zheng }
17551e9c2eb6SYan, Zheng
dentry_lease_is_valid(struct dentry * dentry,unsigned int flags)17568f2a98efSYan, Zheng static int dentry_lease_is_valid(struct dentry *dentry, unsigned int flags)
17572817b000SSage Weil {
17582817b000SSage Weil struct ceph_dentry_info *di;
17592817b000SSage Weil struct ceph_mds_session *session = NULL;
17602817b000SSage Weil u32 seq = 0;
17611e9c2eb6SYan, Zheng int valid = 0;
17622817b000SSage Weil
17632817b000SSage Weil spin_lock(&dentry->d_lock);
17642817b000SSage Weil di = ceph_dentry(dentry);
17651e9c2eb6SYan, Zheng if (di && __dentry_lease_is_valid(di)) {
17662817b000SSage Weil valid = 1;
17671e9c2eb6SYan, Zheng
17682817b000SSage Weil if (di->lease_renew_after &&
17692817b000SSage Weil time_after(jiffies, di->lease_renew_after)) {
177014fb9c9eSJeff Layton /*
177114fb9c9eSJeff Layton * We should renew. If we're in RCU walk mode
177214fb9c9eSJeff Layton * though, we can't do that so just return
177314fb9c9eSJeff Layton * -ECHILD.
177414fb9c9eSJeff Layton */
177514fb9c9eSJeff Layton if (flags & LOOKUP_RCU) {
177614fb9c9eSJeff Layton valid = -ECHILD;
177714fb9c9eSJeff Layton } else {
17781e9c2eb6SYan, Zheng session = ceph_get_mds_session(di->lease_session);
17792817b000SSage Weil seq = di->lease_seq;
17802817b000SSage Weil di->lease_renew_after = 0;
17812817b000SSage Weil di->lease_renew_from = jiffies;
17822817b000SSage Weil }
17832817b000SSage Weil }
17842817b000SSage Weil }
17852817b000SSage Weil spin_unlock(&dentry->d_lock);
17862817b000SSage Weil
17872817b000SSage Weil if (session) {
17888f2a98efSYan, Zheng ceph_mdsc_lease_send_msg(session, dentry,
17892817b000SSage Weil CEPH_MDS_LEASE_RENEW, seq);
17902817b000SSage Weil ceph_put_mds_session(session);
17912817b000SSage Weil }
17922817b000SSage Weil dout("dentry_lease_is_valid - dentry %p = %d\n", dentry, valid);
17932817b000SSage Weil return valid;
17942817b000SSage Weil }
17952817b000SSage Weil
17962817b000SSage Weil /*
17971e9c2eb6SYan, Zheng * Called under dentry->d_lock.
17981e9c2eb6SYan, Zheng */
__dir_lease_try_check(const struct dentry * dentry)17991e9c2eb6SYan, Zheng static int __dir_lease_try_check(const struct dentry *dentry)
18001e9c2eb6SYan, Zheng {
18011e9c2eb6SYan, Zheng struct ceph_dentry_info *di = ceph_dentry(dentry);
18021e9c2eb6SYan, Zheng struct inode *dir;
18031e9c2eb6SYan, Zheng struct ceph_inode_info *ci;
18041e9c2eb6SYan, Zheng int valid = 0;
18051e9c2eb6SYan, Zheng
18061e9c2eb6SYan, Zheng if (!di->lease_shared_gen)
18071e9c2eb6SYan, Zheng return 0;
18081e9c2eb6SYan, Zheng if (IS_ROOT(dentry))
18091e9c2eb6SYan, Zheng return 0;
18101e9c2eb6SYan, Zheng
18111e9c2eb6SYan, Zheng dir = d_inode(dentry->d_parent);
18121e9c2eb6SYan, Zheng ci = ceph_inode(dir);
18131e9c2eb6SYan, Zheng
18141e9c2eb6SYan, Zheng if (spin_trylock(&ci->i_ceph_lock)) {
18151e9c2eb6SYan, Zheng if (atomic_read(&ci->i_shared_gen) == di->lease_shared_gen &&
18161e9c2eb6SYan, Zheng __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 0))
18171e9c2eb6SYan, Zheng valid = 1;
18181e9c2eb6SYan, Zheng spin_unlock(&ci->i_ceph_lock);
18191e9c2eb6SYan, Zheng } else {
18201e9c2eb6SYan, Zheng valid = -EBUSY;
18211e9c2eb6SYan, Zheng }
18221e9c2eb6SYan, Zheng
18231e9c2eb6SYan, Zheng if (!valid)
18241e9c2eb6SYan, Zheng di->lease_shared_gen = 0;
18251e9c2eb6SYan, Zheng return valid;
18261e9c2eb6SYan, Zheng }
18271e9c2eb6SYan, Zheng
18281e9c2eb6SYan, Zheng /*
18292817b000SSage Weil * Check if directory-wide content lease/cap is valid.
18302817b000SSage Weil */
dir_lease_is_valid(struct inode * dir,struct dentry * dentry,struct ceph_mds_client * mdsc)1831719a2514SYan, Zheng static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry,
1832719a2514SYan, Zheng struct ceph_mds_client *mdsc)
18332817b000SSage Weil {
18342817b000SSage Weil struct ceph_inode_info *ci = ceph_inode(dir);
1835feab6ac2SYan, Zheng int valid;
1836feab6ac2SYan, Zheng int shared_gen;
18372817b000SSage Weil
1838be655596SSage Weil spin_lock(&ci->i_ceph_lock);
18392817b000SSage Weil valid = __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1);
1840719a2514SYan, Zheng if (valid) {
1841719a2514SYan, Zheng __ceph_touch_fmode(ci, mdsc, CEPH_FILE_MODE_RD);
1842feab6ac2SYan, Zheng shared_gen = atomic_read(&ci->i_shared_gen);
1843719a2514SYan, Zheng }
1844be655596SSage Weil spin_unlock(&ci->i_ceph_lock);
1845feab6ac2SYan, Zheng if (valid) {
1846feab6ac2SYan, Zheng struct ceph_dentry_info *di;
1847feab6ac2SYan, Zheng spin_lock(&dentry->d_lock);
1848feab6ac2SYan, Zheng di = ceph_dentry(dentry);
1849feab6ac2SYan, Zheng if (dir == d_inode(dentry->d_parent) &&
1850feab6ac2SYan, Zheng di && di->lease_shared_gen == shared_gen)
185137c4efc1SYan, Zheng __ceph_dentry_dir_lease_touch(di);
1852feab6ac2SYan, Zheng else
1853feab6ac2SYan, Zheng valid = 0;
1854feab6ac2SYan, Zheng spin_unlock(&dentry->d_lock);
1855feab6ac2SYan, Zheng }
1856feab6ac2SYan, Zheng dout("dir_lease_is_valid dir %p v%u dentry %p = %d\n",
1857feab6ac2SYan, Zheng dir, (unsigned)atomic_read(&ci->i_shared_gen), dentry, valid);
18582817b000SSage Weil return valid;
18592817b000SSage Weil }
18602817b000SSage Weil
18612817b000SSage Weil /*
18622817b000SSage Weil * Check if cached dentry can be trusted.
18632817b000SSage Weil */
ceph_d_revalidate(struct dentry * dentry,unsigned int flags)18640b728e19SAl Viro static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
18652817b000SSage Weil {
1866bf1c6acaSSage Weil int valid = 0;
1867641235d8SYan, Zheng struct dentry *parent;
1868aa8dd816SAl Viro struct inode *dir, *inode;
1869719a2514SYan, Zheng struct ceph_mds_client *mdsc;
187034286d66SNick Piggin
1871c5267601SJeff Layton valid = fscrypt_d_revalidate(dentry, flags);
1872c5267601SJeff Layton if (valid <= 0)
1873c5267601SJeff Layton return valid;
1874c5267601SJeff Layton
1875f49d1e05SJeff Layton if (flags & LOOKUP_RCU) {
187652953d55SSeraphime Kirkovski parent = READ_ONCE(dentry->d_parent);
1877f49d1e05SJeff Layton dir = d_inode_rcu(parent);
1878f49d1e05SJeff Layton if (!dir)
187934286d66SNick Piggin return -ECHILD;
1880aa8dd816SAl Viro inode = d_inode_rcu(dentry);
1881f49d1e05SJeff Layton } else {
1882f49d1e05SJeff Layton parent = dget_parent(dentry);
1883f49d1e05SJeff Layton dir = d_inode(parent);
1884aa8dd816SAl Viro inode = d_inode(dentry);
1885f49d1e05SJeff Layton }
188634286d66SNick Piggin
1887c5267601SJeff Layton dout("d_revalidate %p '%pd' inode %p offset 0x%llx nokey %d\n", dentry,
1888c5267601SJeff Layton dentry, inode, ceph_dentry(dentry)->offset,
1889c5267601SJeff Layton !!(dentry->d_flags & DCACHE_NOKEY_NAME));
18902817b000SSage Weil
1891*985b9ee8SXiubo Li mdsc = ceph_sb_to_fs_client(dir->i_sb)->mdsc;
1892719a2514SYan, Zheng
18932817b000SSage Weil /* always trust cached snapped dentries, snapdir dentry */
18942817b000SSage Weil if (ceph_snap(dir) != CEPH_NOSNAP) {
1895a455589fSAl Viro dout("d_revalidate %p '%pd' inode %p is SNAPPED\n", dentry,
1896aa8dd816SAl Viro dentry, inode);
1897bf1c6acaSSage Weil valid = 1;
1898aa8dd816SAl Viro } else if (inode && ceph_snap(inode) == CEPH_SNAPDIR) {
1899bf1c6acaSSage Weil valid = 1;
190014fb9c9eSJeff Layton } else {
19018f2a98efSYan, Zheng valid = dentry_lease_is_valid(dentry, flags);
190214fb9c9eSJeff Layton if (valid == -ECHILD)
190314fb9c9eSJeff Layton return valid;
1904719a2514SYan, Zheng if (valid || dir_lease_is_valid(dir, dentry, mdsc)) {
1905aa8dd816SAl Viro if (inode)
1906aa8dd816SAl Viro valid = ceph_is_any_caps(inode);
19079215aeeaSYan, Zheng else
1908bf1c6acaSSage Weil valid = 1;
19092817b000SSage Weil }
191014fb9c9eSJeff Layton }
19112817b000SSage Weil
1912200fd27cSYan, Zheng if (!valid) {
1913200fd27cSYan, Zheng struct ceph_mds_request *req;
19141097680dSJeff Layton int op, err;
19151097680dSJeff Layton u32 mask;
1916200fd27cSYan, Zheng
1917f49d1e05SJeff Layton if (flags & LOOKUP_RCU)
1918f49d1e05SJeff Layton return -ECHILD;
1919f49d1e05SJeff Layton
1920f9009efaSXiubo Li percpu_counter_inc(&mdsc->metric.d_lease_mis);
1921f9009efaSXiubo Li
1922200fd27cSYan, Zheng op = ceph_snap(dir) == CEPH_SNAPDIR ?
19235eb9f604SJeff Layton CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_LOOKUP;
1924200fd27cSYan, Zheng req = ceph_mdsc_create_request(mdsc, op, USE_ANY_MDS);
1925200fd27cSYan, Zheng if (!IS_ERR(req)) {
1926200fd27cSYan, Zheng req->r_dentry = dget(dentry);
19275eb9f604SJeff Layton req->r_num_caps = 2;
19285eb9f604SJeff Layton req->r_parent = dir;
19294c183472SJeff Layton ihold(dir);
1930200fd27cSYan, Zheng
1931200fd27cSYan, Zheng mask = CEPH_STAT_CAP_INODE | CEPH_CAP_AUTH_SHARED;
1932200fd27cSYan, Zheng if (ceph_security_xattr_wanted(dir))
1933200fd27cSYan, Zheng mask |= CEPH_CAP_XATTR_SHARED;
19341097680dSJeff Layton req->r_args.getattr.mask = cpu_to_le32(mask);
1935200fd27cSYan, Zheng
1936200fd27cSYan, Zheng err = ceph_mdsc_do_request(mdsc, NULL, req);
1937c3f4688aSJeff Layton switch (err) {
1938c3f4688aSJeff Layton case 0:
1939c3f4688aSJeff Layton if (d_really_is_positive(dentry) &&
1940c3f4688aSJeff Layton d_inode(dentry) == req->r_target_inode)
1941c3f4688aSJeff Layton valid = 1;
1942c3f4688aSJeff Layton break;
1943c3f4688aSJeff Layton case -ENOENT:
1944c3f4688aSJeff Layton if (d_really_is_negative(dentry))
1945c3f4688aSJeff Layton valid = 1;
1946df561f66SGustavo A. R. Silva fallthrough;
1947c3f4688aSJeff Layton default:
1948c3f4688aSJeff Layton break;
1949200fd27cSYan, Zheng }
1950200fd27cSYan, Zheng ceph_mdsc_put_request(req);
1951200fd27cSYan, Zheng dout("d_revalidate %p lookup result=%d\n",
1952200fd27cSYan, Zheng dentry, err);
1953200fd27cSYan, Zheng }
1954f9009efaSXiubo Li } else {
1955f9009efaSXiubo Li percpu_counter_inc(&mdsc->metric.d_lease_hit);
1956200fd27cSYan, Zheng }
1957200fd27cSYan, Zheng
1958bf1c6acaSSage Weil dout("d_revalidate %p %s\n", dentry, valid ? "valid" : "invalid");
195937c4efc1SYan, Zheng if (!valid)
19609215aeeaSYan, Zheng ceph_dir_clear_complete(dir);
1961641235d8SYan, Zheng
1962f49d1e05SJeff Layton if (!(flags & LOOKUP_RCU))
1963641235d8SYan, Zheng dput(parent);
1964bf1c6acaSSage Weil return valid;
19652817b000SSage Weil }
19662817b000SSage Weil
19672817b000SSage Weil /*
19681e9c2eb6SYan, Zheng * Delete unused dentry that doesn't have valid lease
19691e9c2eb6SYan, Zheng *
19701e9c2eb6SYan, Zheng * Called under dentry->d_lock.
19711e9c2eb6SYan, Zheng */
ceph_d_delete(const struct dentry * dentry)19721e9c2eb6SYan, Zheng static int ceph_d_delete(const struct dentry *dentry)
19731e9c2eb6SYan, Zheng {
19741e9c2eb6SYan, Zheng struct ceph_dentry_info *di;
19751e9c2eb6SYan, Zheng
19761e9c2eb6SYan, Zheng /* won't release caps */
19771e9c2eb6SYan, Zheng if (d_really_is_negative(dentry))
19781e9c2eb6SYan, Zheng return 0;
19791e9c2eb6SYan, Zheng if (ceph_snap(d_inode(dentry)) != CEPH_NOSNAP)
19801e9c2eb6SYan, Zheng return 0;
19811e9c2eb6SYan, Zheng /* vaild lease? */
19821e9c2eb6SYan, Zheng di = ceph_dentry(dentry);
19831e9c2eb6SYan, Zheng if (di) {
19841e9c2eb6SYan, Zheng if (__dentry_lease_is_valid(di))
19851e9c2eb6SYan, Zheng return 0;
19861e9c2eb6SYan, Zheng if (__dir_lease_try_check(dentry))
19871e9c2eb6SYan, Zheng return 0;
19881e9c2eb6SYan, Zheng }
19891e9c2eb6SYan, Zheng return 1;
19901e9c2eb6SYan, Zheng }
19911e9c2eb6SYan, Zheng
19921e9c2eb6SYan, Zheng /*
1993147851d2SSage Weil * Release our ceph_dentry_info.
19942817b000SSage Weil */
ceph_d_release(struct dentry * dentry)1995147851d2SSage Weil static void ceph_d_release(struct dentry *dentry)
19962817b000SSage Weil {
19972817b000SSage Weil struct ceph_dentry_info *di = ceph_dentry(dentry);
1998*985b9ee8SXiubo Li struct ceph_fs_client *fsc = ceph_sb_to_fs_client(dentry->d_sb);
19992817b000SSage Weil
2000147851d2SSage Weil dout("d_release %p\n", dentry);
20015b484a51SJeff Layton
2002f9009efaSXiubo Li atomic64_dec(&fsc->mdsc->metric.total_dentries);
2003f9009efaSXiubo Li
20045b484a51SJeff Layton spin_lock(&dentry->d_lock);
200537c4efc1SYan, Zheng __dentry_lease_unlist(di);
20065b484a51SJeff Layton dentry->d_fsdata = NULL;
20075b484a51SJeff Layton spin_unlock(&dentry->d_lock);
20085b484a51SJeff Layton
20092817b000SSage Weil ceph_put_mds_session(di->lease_session);
20102817b000SSage Weil kmem_cache_free(ceph_dentry_cachep, di);
20112817b000SSage Weil }
20122817b000SSage Weil
2013b58dc410SSage Weil /*
2014b58dc410SSage Weil * When the VFS prunes a dentry from the cache, we need to clear the
2015b58dc410SSage Weil * complete flag on the parent directory.
2016b58dc410SSage Weil *
2017b58dc410SSage Weil * Called under dentry->d_lock.
2018b58dc410SSage Weil */
ceph_d_prune(struct dentry * dentry)2019b58dc410SSage Weil static void ceph_d_prune(struct dentry *dentry)
2020b58dc410SSage Weil {
20215495c2d0SYan, Zheng struct ceph_inode_info *dir_ci;
20225495c2d0SYan, Zheng struct ceph_dentry_info *di;
20235495c2d0SYan, Zheng
20245495c2d0SYan, Zheng dout("ceph_d_prune %pd %p\n", dentry, dentry);
2025b58dc410SSage Weil
2026b58dc410SSage Weil /* do we have a valid parent? */
20278842b3beSSage Weil if (IS_ROOT(dentry))
2028b58dc410SSage Weil return;
2029b58dc410SSage Weil
20305495c2d0SYan, Zheng /* we hold d_lock, so d_parent is stable */
20315495c2d0SYan, Zheng dir_ci = ceph_inode(d_inode(dentry->d_parent));
20325495c2d0SYan, Zheng if (dir_ci->i_vino.snap == CEPH_SNAPDIR)
2033b58dc410SSage Weil return;
2034b58dc410SSage Weil
20355495c2d0SYan, Zheng /* who calls d_delete() should also disable dcache readdir */
20365495c2d0SYan, Zheng if (d_really_is_negative(dentry))
203718fc8abdSAl Viro return;
203818fc8abdSAl Viro
20395495c2d0SYan, Zheng /* d_fsdata does not get cleared until d_release */
20405495c2d0SYan, Zheng if (!d_unhashed(dentry)) {
20415495c2d0SYan, Zheng __ceph_dir_clear_complete(dir_ci);
20425495c2d0SYan, Zheng return;
20435495c2d0SYan, Zheng }
20445495c2d0SYan, Zheng
20455495c2d0SYan, Zheng /* Disable dcache readdir just in case that someone called d_drop()
20465495c2d0SYan, Zheng * or d_invalidate(), but MDS didn't revoke CEPH_CAP_FILE_SHARED
20475495c2d0SYan, Zheng * properly (dcache readdir is still enabled) */
20485495c2d0SYan, Zheng di = ceph_dentry(dentry);
20495495c2d0SYan, Zheng if (di->offset > 0 &&
20505495c2d0SYan, Zheng di->lease_shared_gen == atomic_read(&dir_ci->i_shared_gen))
20515495c2d0SYan, Zheng __ceph_dir_clear_ordered(dir_ci);
2052b58dc410SSage Weil }
20532817b000SSage Weil
20542817b000SSage Weil /*
20552817b000SSage Weil * read() on a dir. This weird interface hack only works if mounted
20562817b000SSage Weil * with '-o dirstat'.
20572817b000SSage Weil */
ceph_read_dir(struct file * file,char __user * buf,size_t size,loff_t * ppos)20582817b000SSage Weil static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
20592817b000SSage Weil loff_t *ppos)
20602817b000SSage Weil {
2061bb48bd4dSChengguang Xu struct ceph_dir_file_info *dfi = file->private_data;
2062496ad9aaSAl Viro struct inode *inode = file_inode(file);
20632817b000SSage Weil struct ceph_inode_info *ci = ceph_inode(inode);
20642817b000SSage Weil int left;
2065ae598083SSage Weil const int bufsize = 1024;
20662817b000SSage Weil
2067*985b9ee8SXiubo Li if (!ceph_test_mount_opt(ceph_sb_to_fs_client(inode->i_sb), DIRSTAT))
20682817b000SSage Weil return -EISDIR;
20692817b000SSage Weil
2070bb48bd4dSChengguang Xu if (!dfi->dir_info) {
2071bb48bd4dSChengguang Xu dfi->dir_info = kmalloc(bufsize, GFP_KERNEL);
2072bb48bd4dSChengguang Xu if (!dfi->dir_info)
20732817b000SSage Weil return -ENOMEM;
2074bb48bd4dSChengguang Xu dfi->dir_info_len =
2075bb48bd4dSChengguang Xu snprintf(dfi->dir_info, bufsize,
20762817b000SSage Weil "entries: %20lld\n"
20772817b000SSage Weil " files: %20lld\n"
20782817b000SSage Weil " subdirs: %20lld\n"
20792817b000SSage Weil "rentries: %20lld\n"
20802817b000SSage Weil " rfiles: %20lld\n"
20812817b000SSage Weil " rsubdirs: %20lld\n"
20822817b000SSage Weil "rbytes: %20lld\n"
20839bbeab41SArnd Bergmann "rctime: %10lld.%09ld\n",
20842817b000SSage Weil ci->i_files + ci->i_subdirs,
20852817b000SSage Weil ci->i_files,
20862817b000SSage Weil ci->i_subdirs,
20872817b000SSage Weil ci->i_rfiles + ci->i_rsubdirs,
20882817b000SSage Weil ci->i_rfiles,
20892817b000SSage Weil ci->i_rsubdirs,
20902817b000SSage Weil ci->i_rbytes,
20919bbeab41SArnd Bergmann ci->i_rctime.tv_sec,
20929bbeab41SArnd Bergmann ci->i_rctime.tv_nsec);
20932817b000SSage Weil }
20942817b000SSage Weil
2095bb48bd4dSChengguang Xu if (*ppos >= dfi->dir_info_len)
20962817b000SSage Weil return 0;
2097bb48bd4dSChengguang Xu size = min_t(unsigned, size, dfi->dir_info_len-*ppos);
2098bb48bd4dSChengguang Xu left = copy_to_user(buf, dfi->dir_info + *ppos, size);
20992817b000SSage Weil if (left == size)
21002817b000SSage Weil return -EFAULT;
21012817b000SSage Weil *ppos += (size - left);
21022817b000SSage Weil return size - left;
21032817b000SSage Weil }
21042817b000SSage Weil
21052817b000SSage Weil
21062817b000SSage Weil
21076c0f3af7SSage Weil /*
21086c0f3af7SSage Weil * Return name hash for a given dentry. This is dependent on
21096c0f3af7SSage Weil * the parent directory's hash function.
21106c0f3af7SSage Weil */
ceph_dentry_hash(struct inode * dir,struct dentry * dn)2111e5f86dc3SSage Weil unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn)
21126c0f3af7SSage Weil {
21136c0f3af7SSage Weil struct ceph_inode_info *dci = ceph_inode(dir);
211476a495d6SJeff Layton unsigned hash;
21156c0f3af7SSage Weil
21166c0f3af7SSage Weil switch (dci->i_dir_layout.dl_dir_hash) {
21176c0f3af7SSage Weil case 0: /* for backward compat */
21186c0f3af7SSage Weil case CEPH_STR_HASH_LINUX:
21196c0f3af7SSage Weil return dn->d_name.hash;
21206c0f3af7SSage Weil
21216c0f3af7SSage Weil default:
212276a495d6SJeff Layton spin_lock(&dn->d_lock);
212376a495d6SJeff Layton hash = ceph_str_hash(dci->i_dir_layout.dl_dir_hash,
21246c0f3af7SSage Weil dn->d_name.name, dn->d_name.len);
212576a495d6SJeff Layton spin_unlock(&dn->d_lock);
212676a495d6SJeff Layton return hash;
21276c0f3af7SSage Weil }
21286c0f3af7SSage Weil }
21296c0f3af7SSage Weil
21303e327154SLinus Torvalds WRAP_DIR_ITER(ceph_readdir) // FIXME!
21312817b000SSage Weil const struct file_operations ceph_dir_fops = {
21322817b000SSage Weil .read = ceph_read_dir,
21333e327154SLinus Torvalds .iterate_shared = shared_ceph_readdir,
21342817b000SSage Weil .llseek = ceph_dir_llseek,
21352817b000SSage Weil .open = ceph_open,
21362817b000SSage Weil .release = ceph_release,
21372817b000SSage Weil .unlocked_ioctl = ceph_ioctl,
213818bd6caaSArnd Bergmann .compat_ioctl = compat_ptr_ioctl,
2139da819c81SYan, Zheng .fsync = ceph_fsync,
2140597817ddSYan, Zheng .lock = ceph_lock,
2141597817ddSYan, Zheng .flock = ceph_flock,
21422817b000SSage Weil };
21432817b000SSage Weil
214438c48b5fSYan, Zheng const struct file_operations ceph_snapdir_fops = {
21453e327154SLinus Torvalds .iterate_shared = shared_ceph_readdir,
214638c48b5fSYan, Zheng .llseek = ceph_dir_llseek,
214738c48b5fSYan, Zheng .open = ceph_open,
214838c48b5fSYan, Zheng .release = ceph_release,
214938c48b5fSYan, Zheng };
215038c48b5fSYan, Zheng
21512817b000SSage Weil const struct inode_operations ceph_dir_iops = {
21522817b000SSage Weil .lookup = ceph_lookup,
21532817b000SSage Weil .permission = ceph_permission,
21542817b000SSage Weil .getattr = ceph_getattr,
21552817b000SSage Weil .setattr = ceph_setattr,
21562817b000SSage Weil .listxattr = ceph_listxattr,
2157cac2f8b8SChristian Brauner .get_inode_acl = ceph_get_acl,
215872466d0bSSage Weil .set_acl = ceph_set_acl,
21592817b000SSage Weil .mknod = ceph_mknod,
21602817b000SSage Weil .symlink = ceph_symlink,
21612817b000SSage Weil .mkdir = ceph_mkdir,
21622817b000SSage Weil .link = ceph_link,
21632817b000SSage Weil .unlink = ceph_unlink,
21642817b000SSage Weil .rmdir = ceph_unlink,
21652817b000SSage Weil .rename = ceph_rename,
21662817b000SSage Weil .create = ceph_create,
21672d83bde9SMiklos Szeredi .atomic_open = ceph_atomic_open,
21682817b000SSage Weil };
21692817b000SSage Weil
217038c48b5fSYan, Zheng const struct inode_operations ceph_snapdir_iops = {
217138c48b5fSYan, Zheng .lookup = ceph_lookup,
217238c48b5fSYan, Zheng .permission = ceph_permission,
217338c48b5fSYan, Zheng .getattr = ceph_getattr,
217438c48b5fSYan, Zheng .mkdir = ceph_mkdir,
217538c48b5fSYan, Zheng .rmdir = ceph_unlink,
21760ea611a3SYan, Zheng .rename = ceph_rename,
217738c48b5fSYan, Zheng };
217838c48b5fSYan, Zheng
217952dfb8acSSage Weil const struct dentry_operations ceph_dentry_ops = {
21802817b000SSage Weil .d_revalidate = ceph_d_revalidate,
21811e9c2eb6SYan, Zheng .d_delete = ceph_d_delete,
2182147851d2SSage Weil .d_release = ceph_d_release,
2183b58dc410SSage Weil .d_prune = ceph_d_prune,
2184ad5cb123SAl Viro .d_init = ceph_d_init,
21852817b000SSage Weil };
2186