xref: /openbmc/linux/fs/ceph/dir.c (revision c845428b7a9157523103100806bc8130d64769c8)
1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
23d14c5d2SYehuda Sadeh #include <linux/ceph/ceph_debug.h>
32817b000SSage Weil 
42817b000SSage Weil #include <linux/spinlock.h>
52817b000SSage Weil #include <linux/namei.h>
65a0e3ad6STejun Heo #include <linux/slab.h>
72817b000SSage Weil #include <linux/sched.h>
82cdeb1e4SAndreas Gruenbacher #include <linux/xattr.h>
92817b000SSage Weil 
102817b000SSage Weil #include "super.h"
113d14c5d2SYehuda Sadeh #include "mds_client.h"
12af9ffa6dSXiubo Li #include "crypto.h"
132817b000SSage Weil 
142817b000SSage Weil /*
152817b000SSage Weil  * Directory operations: readdir, lookup, create, link, unlink,
162817b000SSage Weil  * rename, etc.
172817b000SSage Weil  */
182817b000SSage Weil 
192817b000SSage Weil /*
202817b000SSage Weil  * Ceph MDS operations are specified in terms of a base ino and
212817b000SSage Weil  * relative path.  Thus, the client can specify an operation on a
222817b000SSage Weil  * specific inode (e.g., a getattr due to fstat(2)), or as a path
232817b000SSage Weil  * relative to, say, the root directory.
242817b000SSage Weil  *
252817b000SSage Weil  * Normally, we limit ourselves to strict inode ops (no path component)
262817b000SSage Weil  * or dentry operations (a single path component relative to an ino).  The
272817b000SSage Weil  * exception to this is open_root_dentry(), which will open the mount
282817b000SSage Weil  * point by name.
292817b000SSage Weil  */
302817b000SSage Weil 
3152dfb8acSSage Weil const struct dentry_operations ceph_dentry_ops;
322817b000SSage Weil 
3337c4efc1SYan, Zheng static bool __dentry_lease_is_valid(struct ceph_dentry_info *di);
3437c4efc1SYan, Zheng static int __dir_lease_try_check(const struct dentry *dentry);
3537c4efc1SYan, Zheng 
362817b000SSage Weil /*
372817b000SSage Weil  * Initialize ceph dentry state.
382817b000SSage Weil  */
ceph_d_init(struct dentry * dentry)39ad5cb123SAl Viro static int ceph_d_init(struct dentry *dentry)
402817b000SSage Weil {
412817b000SSage Weil 	struct ceph_dentry_info *di;
422678da88SXiubo Li 	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dentry->d_sb);
432817b000SSage Weil 
4499ec2697SGeliang Tang 	di = kmem_cache_zalloc(ceph_dentry_cachep, GFP_KERNEL);
452817b000SSage Weil 	if (!di)
462817b000SSage Weil 		return -ENOMEM;          /* oh well */
472817b000SSage Weil 
482817b000SSage Weil 	di->dentry = dentry;
492817b000SSage Weil 	di->lease_session = NULL;
509b16f03cSMiklos Szeredi 	di->time = jiffies;
5148d0cbd1SSage Weil 	dentry->d_fsdata = di;
5237c4efc1SYan, Zheng 	INIT_LIST_HEAD(&di->lease_list);
53f9009efaSXiubo Li 
54f9009efaSXiubo Li 	atomic64_inc(&mdsc->metric.total_dentries);
55f9009efaSXiubo Li 
562817b000SSage Weil 	return 0;
572817b000SSage Weil }
582817b000SSage Weil 
592817b000SSage Weil /*
60f3c4ebe6SYan, Zheng  * for f_pos for readdir:
61f3c4ebe6SYan, Zheng  * - hash order:
62f3c4ebe6SYan, Zheng  *	(0xff << 52) | ((24 bits hash) << 28) |
63f3c4ebe6SYan, Zheng  *	(the nth entry has hash collision);
64f3c4ebe6SYan, Zheng  * - frag+name order;
65f3c4ebe6SYan, Zheng  *	((frag value) << 28) | (the nth entry in frag);
662817b000SSage Weil  */
67f3c4ebe6SYan, Zheng #define OFFSET_BITS	28
68f3c4ebe6SYan, Zheng #define OFFSET_MASK	((1 << OFFSET_BITS) - 1)
69f3c4ebe6SYan, Zheng #define HASH_ORDER	(0xffull << (OFFSET_BITS + 24))
ceph_make_fpos(unsigned high,unsigned off,bool hash_order)70f3c4ebe6SYan, Zheng loff_t ceph_make_fpos(unsigned high, unsigned off, bool hash_order)
71f3c4ebe6SYan, Zheng {
72f3c4ebe6SYan, Zheng 	loff_t fpos = ((loff_t)high << 28) | (loff_t)off;
73f3c4ebe6SYan, Zheng 	if (hash_order)
74f3c4ebe6SYan, Zheng 		fpos |= HASH_ORDER;
75f3c4ebe6SYan, Zheng 	return fpos;
76f3c4ebe6SYan, Zheng }
77f3c4ebe6SYan, Zheng 
is_hash_order(loff_t p)78f3c4ebe6SYan, Zheng static bool is_hash_order(loff_t p)
79f3c4ebe6SYan, Zheng {
80f3c4ebe6SYan, Zheng 	return (p & HASH_ORDER) == HASH_ORDER;
81f3c4ebe6SYan, Zheng }
82f3c4ebe6SYan, Zheng 
fpos_frag(loff_t p)832817b000SSage Weil static unsigned fpos_frag(loff_t p)
842817b000SSage Weil {
85f3c4ebe6SYan, Zheng 	return p >> OFFSET_BITS;
862817b000SSage Weil }
87f3c4ebe6SYan, Zheng 
fpos_hash(loff_t p)88f3c4ebe6SYan, Zheng static unsigned fpos_hash(loff_t p)
89f3c4ebe6SYan, Zheng {
90f3c4ebe6SYan, Zheng 	return ceph_frag_value(fpos_frag(p));
91f3c4ebe6SYan, Zheng }
92f3c4ebe6SYan, Zheng 
fpos_off(loff_t p)932817b000SSage Weil static unsigned fpos_off(loff_t p)
942817b000SSage Weil {
95f3c4ebe6SYan, Zheng 	return p & OFFSET_MASK;
962817b000SSage Weil }
972817b000SSage Weil 
fpos_cmp(loff_t l,loff_t r)984d5f5df6SYan, Zheng static int fpos_cmp(loff_t l, loff_t r)
994d5f5df6SYan, Zheng {
1004d5f5df6SYan, Zheng 	int v = ceph_frag_compare(fpos_frag(l), fpos_frag(r));
1014d5f5df6SYan, Zheng 	if (v)
1024d5f5df6SYan, Zheng 		return v;
1034d5f5df6SYan, Zheng 	return (int)(fpos_off(l) - fpos_off(r));
1044d5f5df6SYan, Zheng }
1054d5f5df6SYan, Zheng 
1062817b000SSage Weil /*
107fdd4e158SYan, Zheng  * make note of the last dentry we read, so we can
108fdd4e158SYan, Zheng  * continue at the same lexicographical point,
109fdd4e158SYan, Zheng  * regardless of what dir changes take place on the
110fdd4e158SYan, Zheng  * server.
111fdd4e158SYan, Zheng  */
note_last_dentry(struct ceph_dir_file_info * dfi,const char * name,int len,unsigned next_offset)112bb48bd4dSChengguang Xu static int note_last_dentry(struct ceph_dir_file_info *dfi, const char *name,
113fdd4e158SYan, Zheng 		            int len, unsigned next_offset)
114fdd4e158SYan, Zheng {
115fdd4e158SYan, Zheng 	char *buf = kmalloc(len+1, GFP_KERNEL);
116fdd4e158SYan, Zheng 	if (!buf)
117fdd4e158SYan, Zheng 		return -ENOMEM;
118bb48bd4dSChengguang Xu 	kfree(dfi->last_name);
119bb48bd4dSChengguang Xu 	dfi->last_name = buf;
120bb48bd4dSChengguang Xu 	memcpy(dfi->last_name, name, len);
121bb48bd4dSChengguang Xu 	dfi->last_name[len] = 0;
122bb48bd4dSChengguang Xu 	dfi->next_offset = next_offset;
123bb48bd4dSChengguang Xu 	dout("note_last_dentry '%s'\n", dfi->last_name);
124fdd4e158SYan, Zheng 	return 0;
125fdd4e158SYan, Zheng }
126fdd4e158SYan, Zheng 
127c530cd24SYan, Zheng 
128c530cd24SYan, Zheng static struct dentry *
__dcache_find_get_entry(struct dentry * parent,u64 idx,struct ceph_readdir_cache_control * cache_ctl)129c530cd24SYan, Zheng __dcache_find_get_entry(struct dentry *parent, u64 idx,
130c530cd24SYan, Zheng 			struct ceph_readdir_cache_control *cache_ctl)
131c530cd24SYan, Zheng {
132c530cd24SYan, Zheng 	struct inode *dir = d_inode(parent);
133c530cd24SYan, Zheng 	struct dentry *dentry;
134c530cd24SYan, Zheng 	unsigned idx_mask = (PAGE_SIZE / sizeof(struct dentry *)) - 1;
135c530cd24SYan, Zheng 	loff_t ptr_pos = idx * sizeof(struct dentry *);
136c530cd24SYan, Zheng 	pgoff_t ptr_pgoff = ptr_pos >> PAGE_SHIFT;
137c530cd24SYan, Zheng 
138c530cd24SYan, Zheng 	if (ptr_pos >= i_size_read(dir))
139c530cd24SYan, Zheng 		return NULL;
140c530cd24SYan, Zheng 
141c530cd24SYan, Zheng 	if (!cache_ctl->page || ptr_pgoff != page_index(cache_ctl->page)) {
142c530cd24SYan, Zheng 		ceph_readdir_cache_release(cache_ctl);
143c530cd24SYan, Zheng 		cache_ctl->page = find_lock_page(&dir->i_data, ptr_pgoff);
144c530cd24SYan, Zheng 		if (!cache_ctl->page) {
145c530cd24SYan, Zheng 			dout(" page %lu not found\n", ptr_pgoff);
146c530cd24SYan, Zheng 			return ERR_PTR(-EAGAIN);
147c530cd24SYan, Zheng 		}
148c530cd24SYan, Zheng 		/* reading/filling the cache are serialized by
149810313c5Shongnanli 		   i_rwsem, no need to use page lock */
150c530cd24SYan, Zheng 		unlock_page(cache_ctl->page);
151c530cd24SYan, Zheng 		cache_ctl->dentries = kmap(cache_ctl->page);
152c530cd24SYan, Zheng 	}
153c530cd24SYan, Zheng 
154c530cd24SYan, Zheng 	cache_ctl->index = idx & idx_mask;
155c530cd24SYan, Zheng 
156c530cd24SYan, Zheng 	rcu_read_lock();
157c530cd24SYan, Zheng 	spin_lock(&parent->d_lock);
158c530cd24SYan, Zheng 	/* check i_size again here, because empty directory can be
159810313c5Shongnanli 	 * marked as complete while not holding the i_rwsem. */
160c530cd24SYan, Zheng 	if (ceph_dir_is_complete_ordered(dir) && ptr_pos < i_size_read(dir))
161c530cd24SYan, Zheng 		dentry = cache_ctl->dentries[cache_ctl->index];
162c530cd24SYan, Zheng 	else
163c530cd24SYan, Zheng 		dentry = NULL;
164c530cd24SYan, Zheng 	spin_unlock(&parent->d_lock);
165c530cd24SYan, Zheng 	if (dentry && !lockref_get_not_dead(&dentry->d_lockref))
166c530cd24SYan, Zheng 		dentry = NULL;
167c530cd24SYan, Zheng 	rcu_read_unlock();
168c530cd24SYan, Zheng 	return dentry ? : ERR_PTR(-EAGAIN);
169c530cd24SYan, Zheng }
170c530cd24SYan, Zheng 
171fdd4e158SYan, Zheng /*
1722817b000SSage Weil  * When possible, we try to satisfy a readdir by peeking at the
1732817b000SSage Weil  * dcache.  We make this work by carefully ordering dentries on
174946e51f2SAl Viro  * d_child when we initially get results back from the MDS, and
1752817b000SSage Weil  * falling back to a "normal" sync readdir if any dentries in the dir
1762817b000SSage Weil  * are dropped.
1772817b000SSage Weil  *
1782f276c51SYan, Zheng  * Complete dir indicates that we have all dentries in the dir.  It is
1792817b000SSage Weil  * defined IFF we hold CEPH_CAP_FILE_SHARED (which will be revoked by
1802817b000SSage Weil  * the MDS if/when the directory is modified).
1812817b000SSage Weil  */
__dcache_readdir(struct file * file,struct dir_context * ctx,int shared_gen)182a30be7cbSYan, Zheng static int __dcache_readdir(struct file *file,  struct dir_context *ctx,
18397aeb6bfSYan, Zheng 			    int shared_gen)
1842817b000SSage Weil {
185bb48bd4dSChengguang Xu 	struct ceph_dir_file_info *dfi = file->private_data;
186b583043eSAl Viro 	struct dentry *parent = file->f_path.dentry;
1872b0143b5SDavid Howells 	struct inode *dir = d_inode(parent);
188fdd4e158SYan, Zheng 	struct dentry *dentry, *last = NULL;
1892817b000SSage Weil 	struct ceph_dentry_info *di;
190fdd4e158SYan, Zheng 	struct ceph_readdir_cache_control cache_ctl = {};
191c530cd24SYan, Zheng 	u64 idx = 0;
192c530cd24SYan, Zheng 	int err = 0;
1932817b000SSage Weil 
19497aeb6bfSYan, Zheng 	dout("__dcache_readdir %p v%u at %llx\n", dir, (unsigned)shared_gen, ctx->pos);
1952817b000SSage Weil 
196c530cd24SYan, Zheng 	/* search start position */
197c530cd24SYan, Zheng 	if (ctx->pos > 2) {
198c530cd24SYan, Zheng 		u64 count = div_u64(i_size_read(dir), sizeof(struct dentry *));
199c530cd24SYan, Zheng 		while (count > 0) {
200c530cd24SYan, Zheng 			u64 step = count >> 1;
201c530cd24SYan, Zheng 			dentry = __dcache_find_get_entry(parent, idx + step,
202c530cd24SYan, Zheng 							 &cache_ctl);
203c530cd24SYan, Zheng 			if (!dentry) {
204c530cd24SYan, Zheng 				/* use linar search */
205c530cd24SYan, Zheng 				idx = 0;
206c530cd24SYan, Zheng 				break;
207c530cd24SYan, Zheng 			}
208c530cd24SYan, Zheng 			if (IS_ERR(dentry)) {
209c530cd24SYan, Zheng 				err = PTR_ERR(dentry);
210c530cd24SYan, Zheng 				goto out;
211c530cd24SYan, Zheng 			}
212c530cd24SYan, Zheng 			di = ceph_dentry(dentry);
213c530cd24SYan, Zheng 			spin_lock(&dentry->d_lock);
214c530cd24SYan, Zheng 			if (fpos_cmp(di->offset, ctx->pos) < 0) {
215c530cd24SYan, Zheng 				idx += step + 1;
216c530cd24SYan, Zheng 				count -= step + 1;
217c530cd24SYan, Zheng 			} else {
218c530cd24SYan, Zheng 				count = step;
219c530cd24SYan, Zheng 			}
220c530cd24SYan, Zheng 			spin_unlock(&dentry->d_lock);
221c530cd24SYan, Zheng 			dput(dentry);
2222817b000SSage Weil 		}
2232817b000SSage Weil 
224c530cd24SYan, Zheng 		dout("__dcache_readdir %p cache idx %llu\n", dir, idx);
225c530cd24SYan, Zheng 	}
226fdd4e158SYan, Zheng 
227c530cd24SYan, Zheng 
228c530cd24SYan, Zheng 	for (;;) {
229c530cd24SYan, Zheng 		bool emit_dentry = false;
230c530cd24SYan, Zheng 		dentry = __dcache_find_get_entry(parent, idx++, &cache_ctl);
231c530cd24SYan, Zheng 		if (!dentry) {
232bb48bd4dSChengguang Xu 			dfi->file_info.flags |= CEPH_F_ATEND;
233fdd4e158SYan, Zheng 			err = 0;
234fdd4e158SYan, Zheng 			break;
2352817b000SSage Weil 		}
236c530cd24SYan, Zheng 		if (IS_ERR(dentry)) {
237c530cd24SYan, Zheng 			err = PTR_ERR(dentry);
238c530cd24SYan, Zheng 			goto out;
239fdd4e158SYan, Zheng 		}
240fdd4e158SYan, Zheng 
241fdd4e158SYan, Zheng 		spin_lock(&dentry->d_lock);
2425495c2d0SYan, Zheng 		di = ceph_dentry(dentry);
2435495c2d0SYan, Zheng 		if (d_unhashed(dentry) ||
2445495c2d0SYan, Zheng 		    d_really_is_negative(dentry) ||
245af9ffa6dSXiubo Li 		    di->lease_shared_gen != shared_gen ||
246af9ffa6dSXiubo Li 		    ((dentry->d_flags & DCACHE_NOKEY_NAME) &&
247af9ffa6dSXiubo Li 		     fscrypt_has_encryption_key(dir))) {
2485495c2d0SYan, Zheng 			spin_unlock(&dentry->d_lock);
2495495c2d0SYan, Zheng 			dput(dentry);
2505495c2d0SYan, Zheng 			err = -EAGAIN;
2515495c2d0SYan, Zheng 			goto out;
2525495c2d0SYan, Zheng 		}
2535495c2d0SYan, Zheng 		if (fpos_cmp(ctx->pos, di->offset) <= 0) {
25437c4efc1SYan, Zheng 			__ceph_dentry_dir_lease_touch(di);
255fdd4e158SYan, Zheng 			emit_dentry = true;
2562817b000SSage Weil 		}
257b7ab39f6SNick Piggin 		spin_unlock(&dentry->d_lock);
2582817b000SSage Weil 
259fdd4e158SYan, Zheng 		if (emit_dentry) {
260f3c4ebe6SYan, Zheng 			dout(" %llx dentry %p %pd %p\n", di->offset,
2612b0143b5SDavid Howells 			     dentry, dentry, d_inode(dentry));
262fdd4e158SYan, Zheng 			ctx->pos = di->offset;
26377acfa29SAl Viro 			if (!dir_emit(ctx, dentry->d_name.name,
264ebce3eb2SJeff Layton 				      dentry->d_name.len, ceph_present_inode(d_inode(dentry)),
2652b0143b5SDavid Howells 				      d_inode(dentry)->i_mode >> 12)) {
26677acfa29SAl Viro 				dput(dentry);
267fdd4e158SYan, Zheng 				err = 0;
268fdd4e158SYan, Zheng 				break;
26977acfa29SAl Viro 			}
270fdd4e158SYan, Zheng 			ctx->pos++;
2710081bd83SYan, Zheng 
27277acfa29SAl Viro 			if (last)
2732817b000SSage Weil 				dput(last);
274f5b06628SSage Weil 			last = dentry;
275fdd4e158SYan, Zheng 		} else {
276fdd4e158SYan, Zheng 			dput(dentry);
2772817b000SSage Weil 		}
278fdd4e158SYan, Zheng 	}
279c530cd24SYan, Zheng out:
280fdd4e158SYan, Zheng 	ceph_readdir_cache_release(&cache_ctl);
281fdd4e158SYan, Zheng 	if (last) {
282fdd4e158SYan, Zheng 		int ret;
283fdd4e158SYan, Zheng 		di = ceph_dentry(last);
284bb48bd4dSChengguang Xu 		ret = note_last_dentry(dfi, last->d_name.name, last->d_name.len,
285fdd4e158SYan, Zheng 				       fpos_off(di->offset) + 1);
286fdd4e158SYan, Zheng 		if (ret < 0)
287fdd4e158SYan, Zheng 			err = ret;
288fdd4e158SYan, Zheng 		dput(last);
28984583cfbSYan, Zheng 		/* last_name no longer match cache index */
290bb48bd4dSChengguang Xu 		if (dfi->readdir_cache_idx >= 0) {
291bb48bd4dSChengguang Xu 			dfi->readdir_cache_idx = -1;
292bb48bd4dSChengguang Xu 			dfi->dir_release_count = 0;
29384583cfbSYan, Zheng 		}
294fdd4e158SYan, Zheng 	}
295fdd4e158SYan, Zheng 	return err;
2962817b000SSage Weil }
2972817b000SSage Weil 
need_send_readdir(struct ceph_dir_file_info * dfi,loff_t pos)298bb48bd4dSChengguang Xu static bool need_send_readdir(struct ceph_dir_file_info *dfi, loff_t pos)
299f3c4ebe6SYan, Zheng {
300bb48bd4dSChengguang Xu 	if (!dfi->last_readdir)
301f3c4ebe6SYan, Zheng 		return true;
302f3c4ebe6SYan, Zheng 	if (is_hash_order(pos))
303bb48bd4dSChengguang Xu 		return !ceph_frag_contains_value(dfi->frag, fpos_hash(pos));
304f3c4ebe6SYan, Zheng 	else
305bb48bd4dSChengguang Xu 		return dfi->frag != fpos_frag(pos);
306f3c4ebe6SYan, Zheng }
307f3c4ebe6SYan, Zheng 
ceph_readdir(struct file * file,struct dir_context * ctx)30877acfa29SAl Viro static int ceph_readdir(struct file *file, struct dir_context *ctx)
3092817b000SSage Weil {
310bb48bd4dSChengguang Xu 	struct ceph_dir_file_info *dfi = file->private_data;
31177acfa29SAl Viro 	struct inode *inode = file_inode(file);
3122817b000SSage Weil 	struct ceph_inode_info *ci = ceph_inode(inode);
313*985b9ee8SXiubo Li 	struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode);
3143d14c5d2SYehuda Sadeh 	struct ceph_mds_client *mdsc = fsc->mdsc;
3158974eebdSYan, Zheng 	int i;
3162817b000SSage Weil 	int err;
317b50c2de5SYan, Zheng 	unsigned frag = -1;
3182817b000SSage Weil 	struct ceph_mds_reply_info_parsed *rinfo;
3192817b000SSage Weil 
3208974eebdSYan, Zheng 	dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos);
321bb48bd4dSChengguang Xu 	if (dfi->file_info.flags & CEPH_F_ATEND)
3222817b000SSage Weil 		return 0;
3232817b000SSage Weil 
3242817b000SSage Weil 	/* always start with . and .. */
32577acfa29SAl Viro 	if (ctx->pos == 0) {
3262817b000SSage Weil 		dout("readdir off 0 -> '.'\n");
327ebce3eb2SJeff Layton 		if (!dir_emit(ctx, ".", 1, ceph_present_inode(inode),
32877acfa29SAl Viro 			    inode->i_mode >> 12))
3292817b000SSage Weil 			return 0;
33077acfa29SAl Viro 		ctx->pos = 1;
3312817b000SSage Weil 	}
33277acfa29SAl Viro 	if (ctx->pos == 1) {
333ebce3eb2SJeff Layton 		u64 ino;
334ebce3eb2SJeff Layton 		struct dentry *dentry = file->f_path.dentry;
335ebce3eb2SJeff Layton 
336ebce3eb2SJeff Layton 		spin_lock(&dentry->d_lock);
337ebce3eb2SJeff Layton 		ino = ceph_present_inode(dentry->d_parent->d_inode);
338ebce3eb2SJeff Layton 		spin_unlock(&dentry->d_lock);
339ebce3eb2SJeff Layton 
3402817b000SSage Weil 		dout("readdir off 1 -> '..'\n");
341ebce3eb2SJeff Layton 		if (!dir_emit(ctx, "..", 2, ino, inode->i_mode >> 12))
3422817b000SSage Weil 			return 0;
34377acfa29SAl Viro 		ctx->pos = 2;
3442817b000SSage Weil 	}
3452817b000SSage Weil 
34614e034a6SLuís Henriques 	err = ceph_fscrypt_prepare_readdir(inode);
34714e034a6SLuís Henriques 	if (err < 0)
348af9ffa6dSXiubo Li 		return err;
349af9ffa6dSXiubo Li 
350be655596SSage Weil 	spin_lock(&ci->i_ceph_lock);
351719a2514SYan, Zheng 	/* request Fx cap. if have Fx, we don't need to release Fs cap
352719a2514SYan, Zheng 	 * for later create/unlink. */
353719a2514SYan, Zheng 	__ceph_touch_fmode(ci, mdsc, CEPH_FILE_MODE_WR);
354719a2514SYan, Zheng 	/* can we use the dcache? */
355fdd4e158SYan, Zheng 	if (ceph_test_mount_opt(fsc, DCACHE) &&
3563d14c5d2SYehuda Sadeh 	    !ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
357a0dff78dSSage Weil 	    ceph_snap(inode) != CEPH_SNAPDIR &&
35870db4f36SYan, Zheng 	    __ceph_dir_is_complete_ordered(ci) &&
3591af16d54SXiubo Li 	    __ceph_caps_issued_mask_metric(ci, CEPH_CAP_FILE_SHARED, 1)) {
36097aeb6bfSYan, Zheng 		int shared_gen = atomic_read(&ci->i_shared_gen);
3611af16d54SXiubo Li 
362be655596SSage Weil 		spin_unlock(&ci->i_ceph_lock);
363a30be7cbSYan, Zheng 		err = __dcache_readdir(file, ctx, shared_gen);
364efa4c120SSage Weil 		if (err != -EAGAIN)
3652817b000SSage Weil 			return err;
366efa4c120SSage Weil 	} else {
367be655596SSage Weil 		spin_unlock(&ci->i_ceph_lock);
368efa4c120SSage Weil 	}
3692817b000SSage Weil 
3702817b000SSage Weil 	/* proceed with a normal readdir */
3712817b000SSage Weil more:
3722817b000SSage Weil 	/* do we have the correct frag content buffered? */
373bb48bd4dSChengguang Xu 	if (need_send_readdir(dfi, ctx->pos)) {
3742817b000SSage Weil 		struct ceph_mds_request *req;
3752817b000SSage Weil 		int op = ceph_snap(inode) == CEPH_SNAPDIR ?
3762817b000SSage Weil 			CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR;
3772817b000SSage Weil 
3782817b000SSage Weil 		/* discard old result, if any */
379bb48bd4dSChengguang Xu 		if (dfi->last_readdir) {
380bb48bd4dSChengguang Xu 			ceph_mdsc_put_request(dfi->last_readdir);
381bb48bd4dSChengguang Xu 			dfi->last_readdir = NULL;
382393f6620SSage Weil 		}
3832817b000SSage Weil 
384f3c4ebe6SYan, Zheng 		if (is_hash_order(ctx->pos)) {
385b50c2de5SYan, Zheng 			/* fragtree isn't always accurate. choose frag
386b50c2de5SYan, Zheng 			 * based on previous reply when possible. */
387b50c2de5SYan, Zheng 			if (frag == (unsigned)-1)
388f3c4ebe6SYan, Zheng 				frag = ceph_choose_frag(ci, fpos_hash(ctx->pos),
389f3c4ebe6SYan, Zheng 							NULL, NULL);
390f3c4ebe6SYan, Zheng 		} else {
391f3c4ebe6SYan, Zheng 			frag = fpos_frag(ctx->pos);
392f3c4ebe6SYan, Zheng 		}
393f3c4ebe6SYan, Zheng 
3942817b000SSage Weil 		dout("readdir fetching %llx.%llx frag %x offset '%s'\n",
395bb48bd4dSChengguang Xu 		     ceph_vinop(inode), frag, dfi->last_name);
3962817b000SSage Weil 		req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
3972817b000SSage Weil 		if (IS_ERR(req))
3982817b000SSage Weil 			return PTR_ERR(req);
399af9ffa6dSXiubo Li 
40054008399SYan, Zheng 		err = ceph_alloc_readdir_reply_buffer(req, inode);
40154008399SYan, Zheng 		if (err) {
40254008399SYan, Zheng 			ceph_mdsc_put_request(req);
40354008399SYan, Zheng 			return err;
40454008399SYan, Zheng 		}
4052817b000SSage Weil 		/* hints to request -> mds selection code */
4062817b000SSage Weil 		req->r_direct_mode = USE_AUTH_MDS;
4075d37ca14SYan, Zheng 		if (op == CEPH_MDS_OP_READDIR) {
4082817b000SSage Weil 			req->r_direct_hash = ceph_frag_value(frag);
409bc2de10dSJeff Layton 			__set_bit(CEPH_MDS_R_DIRECT_IS_HASH, &req->r_req_flags);
41087c91a96SYan, Zheng 			req->r_inode_drop = CEPH_CAP_FILE_EXCL;
4115d37ca14SYan, Zheng 		}
412bb48bd4dSChengguang Xu 		if (dfi->last_name) {
413af9ffa6dSXiubo Li 			struct qstr d_name = { .name = dfi->last_name,
414af9ffa6dSXiubo Li 					       .len = strlen(dfi->last_name) };
415af9ffa6dSXiubo Li 
416af9ffa6dSXiubo Li 			req->r_path2 = kzalloc(NAME_MAX + 1, GFP_KERNEL);
417a149bb9aSSanidhya Kashyap 			if (!req->r_path2) {
418a149bb9aSSanidhya Kashyap 				ceph_mdsc_put_request(req);
419a149bb9aSSanidhya Kashyap 				return -ENOMEM;
420a149bb9aSSanidhya Kashyap 			}
421af9ffa6dSXiubo Li 
422af9ffa6dSXiubo Li 			err = ceph_encode_encrypted_dname(inode, &d_name,
423af9ffa6dSXiubo Li 							  req->r_path2);
424af9ffa6dSXiubo Li 			if (err < 0) {
425af9ffa6dSXiubo Li 				ceph_mdsc_put_request(req);
426af9ffa6dSXiubo Li 				return err;
427af9ffa6dSXiubo Li 			}
42879162547SYan, Zheng 		} else if (is_hash_order(ctx->pos)) {
42979162547SYan, Zheng 			req->r_args.readdir.offset_hash =
43079162547SYan, Zheng 				cpu_to_le32(fpos_hash(ctx->pos));
431a149bb9aSSanidhya Kashyap 		}
43279162547SYan, Zheng 
433bb48bd4dSChengguang Xu 		req->r_dir_release_cnt = dfi->dir_release_count;
434bb48bd4dSChengguang Xu 		req->r_dir_ordered_cnt = dfi->dir_ordered_count;
435bb48bd4dSChengguang Xu 		req->r_readdir_cache_idx = dfi->readdir_cache_idx;
436bb48bd4dSChengguang Xu 		req->r_readdir_offset = dfi->next_offset;
4372817b000SSage Weil 		req->r_args.readdir.frag = cpu_to_le32(frag);
438956d39d6SYan, Zheng 		req->r_args.readdir.flags =
439956d39d6SYan, Zheng 				cpu_to_le16(CEPH_READDIR_REPLY_BITFLAGS);
440a149bb9aSSanidhya Kashyap 
441a149bb9aSSanidhya Kashyap 		req->r_inode = inode;
442a149bb9aSSanidhya Kashyap 		ihold(inode);
443a149bb9aSSanidhya Kashyap 		req->r_dentry = dget(file->f_path.dentry);
4442817b000SSage Weil 		err = ceph_mdsc_do_request(mdsc, NULL, req);
4452817b000SSage Weil 		if (err < 0) {
4462817b000SSage Weil 			ceph_mdsc_put_request(req);
4472817b000SSage Weil 			return err;
4482817b000SSage Weil 		}
449f3c4ebe6SYan, Zheng 		dout("readdir got and parsed readdir result=%d on "
450f3c4ebe6SYan, Zheng 		     "frag %x, end=%d, complete=%d, hash_order=%d\n",
451f3c4ebe6SYan, Zheng 		     err, frag,
4522817b000SSage Weil 		     (int)req->r_reply_info.dir_end,
453f3c4ebe6SYan, Zheng 		     (int)req->r_reply_info.dir_complete,
454f3c4ebe6SYan, Zheng 		     (int)req->r_reply_info.hash_order);
4552817b000SSage Weil 
45681c6aea5SYan, Zheng 		rinfo = &req->r_reply_info;
45781c6aea5SYan, Zheng 		if (le32_to_cpu(rinfo->dir_dir->frag) != frag) {
45881c6aea5SYan, Zheng 			frag = le32_to_cpu(rinfo->dir_dir->frag);
459f3c4ebe6SYan, Zheng 			if (!rinfo->hash_order) {
460bb48bd4dSChengguang Xu 				dfi->next_offset = req->r_readdir_offset;
4618974eebdSYan, Zheng 				/* adjust ctx->pos to beginning of frag */
462f3c4ebe6SYan, Zheng 				ctx->pos = ceph_make_fpos(frag,
463bb48bd4dSChengguang Xu 							  dfi->next_offset,
464f3c4ebe6SYan, Zheng 							  false);
465f3c4ebe6SYan, Zheng 			}
46681c6aea5SYan, Zheng 		}
467fdd4e158SYan, Zheng 
468bb48bd4dSChengguang Xu 		dfi->frag = frag;
469bb48bd4dSChengguang Xu 		dfi->last_readdir = req;
4702817b000SSage Weil 
471bc2de10dSJeff Layton 		if (test_bit(CEPH_MDS_R_DID_PREPOPULATE, &req->r_req_flags)) {
472bb48bd4dSChengguang Xu 			dfi->readdir_cache_idx = req->r_readdir_cache_idx;
473bb48bd4dSChengguang Xu 			if (dfi->readdir_cache_idx < 0) {
474fdd4e158SYan, Zheng 				/* preclude from marking dir ordered */
475bb48bd4dSChengguang Xu 				dfi->dir_ordered_count = 0;
4768974eebdSYan, Zheng 			} else if (ceph_frag_is_leftmost(frag) &&
477bb48bd4dSChengguang Xu 				   dfi->next_offset == 2) {
478fdd4e158SYan, Zheng 				/* note dir version at start of readdir so
479fdd4e158SYan, Zheng 				 * we can tell if any dentries get dropped */
480bb48bd4dSChengguang Xu 				dfi->dir_release_count = req->r_dir_release_cnt;
481bb48bd4dSChengguang Xu 				dfi->dir_ordered_count = req->r_dir_ordered_cnt;
482fdd4e158SYan, Zheng 			}
483fdd4e158SYan, Zheng 		} else {
4844c069a58SChengguang Xu 			dout("readdir !did_prepopulate\n");
485fdd4e158SYan, Zheng 			/* disable readdir cache */
486bb48bd4dSChengguang Xu 			dfi->readdir_cache_idx = -1;
487fdd4e158SYan, Zheng 			/* preclude from marking dir complete */
488bb48bd4dSChengguang Xu 			dfi->dir_release_count = 0;
489fdd4e158SYan, Zheng 		}
490fdd4e158SYan, Zheng 
491f3c4ebe6SYan, Zheng 		/* note next offset and last dentry name */
492f3c4ebe6SYan, Zheng 		if (rinfo->dir_nr > 0) {
4932a5beea3SYan, Zheng 			struct ceph_mds_reply_dir_entry *rde =
4942a5beea3SYan, Zheng 					rinfo->dir_entries + (rinfo->dir_nr-1);
495f3c4ebe6SYan, Zheng 			unsigned next_offset = req->r_reply_info.dir_end ?
496f3c4ebe6SYan, Zheng 					2 : (fpos_off(rde->offset) + 1);
497bb48bd4dSChengguang Xu 			err = note_last_dentry(dfi, rde->name, rde->name_len,
498f3c4ebe6SYan, Zheng 					       next_offset);
499f639d986SXiubo Li 			if (err) {
500f639d986SXiubo Li 				ceph_mdsc_put_request(dfi->last_readdir);
501f639d986SXiubo Li 				dfi->last_readdir = NULL;
5022817b000SSage Weil 				return err;
503f639d986SXiubo Li 			}
504f3c4ebe6SYan, Zheng 		} else if (req->r_reply_info.dir_end) {
505bb48bd4dSChengguang Xu 			dfi->next_offset = 2;
506f3c4ebe6SYan, Zheng 			/* keep last name */
5072817b000SSage Weil 		}
5082817b000SSage Weil 	}
5092817b000SSage Weil 
510bb48bd4dSChengguang Xu 	rinfo = &dfi->last_readdir->r_reply_info;
5118974eebdSYan, Zheng 	dout("readdir frag %x num %d pos %llx chunk first %llx\n",
512bb48bd4dSChengguang Xu 	     dfi->frag, rinfo->dir_nr, ctx->pos,
5138974eebdSYan, Zheng 	     rinfo->dir_nr ? rinfo->dir_entries[0].offset : 0LL);
51477acfa29SAl Viro 
5158974eebdSYan, Zheng 	i = 0;
5168974eebdSYan, Zheng 	/* search start position */
5178974eebdSYan, Zheng 	if (rinfo->dir_nr > 0) {
5188974eebdSYan, Zheng 		int step, nr = rinfo->dir_nr;
5198974eebdSYan, Zheng 		while (nr > 0) {
5208974eebdSYan, Zheng 			step = nr >> 1;
5218974eebdSYan, Zheng 			if (rinfo->dir_entries[i + step].offset < ctx->pos) {
5228974eebdSYan, Zheng 				i +=  step + 1;
5238974eebdSYan, Zheng 				nr -= step + 1;
5248974eebdSYan, Zheng 			} else {
5258974eebdSYan, Zheng 				nr = step;
5268974eebdSYan, Zheng 			}
5278974eebdSYan, Zheng 		}
5288974eebdSYan, Zheng 	}
5298974eebdSYan, Zheng 	for (; i < rinfo->dir_nr; i++) {
5308974eebdSYan, Zheng 		struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i;
5313105c19cSSage Weil 
532af9ffa6dSXiubo Li 		if (rde->offset < ctx->pos) {
533af9ffa6dSXiubo Li 			pr_warn("%s: rde->offset 0x%llx ctx->pos 0x%llx\n",
534af9ffa6dSXiubo Li 				__func__, rde->offset, ctx->pos);
535af9ffa6dSXiubo Li 			return -EIO;
536af9ffa6dSXiubo Li 		}
537af9ffa6dSXiubo Li 
538af9ffa6dSXiubo Li 		if (WARN_ON_ONCE(!rde->inode.in))
539af9ffa6dSXiubo Li 			return -EIO;
5408974eebdSYan, Zheng 
5418974eebdSYan, Zheng 		ctx->pos = rde->offset;
5428974eebdSYan, Zheng 		dout("readdir (%d/%d) -> %llx '%.*s' %p\n",
5438974eebdSYan, Zheng 		     i, rinfo->dir_nr, ctx->pos,
5442a5beea3SYan, Zheng 		     rde->name_len, rde->name, &rde->inode.in);
5458974eebdSYan, Zheng 
5462a5beea3SYan, Zheng 		if (!dir_emit(ctx, rde->name, rde->name_len,
547ebce3eb2SJeff Layton 			      ceph_present_ino(inode->i_sb, le64_to_cpu(rde->inode.in->ino)),
548ebce3eb2SJeff Layton 			      le32_to_cpu(rde->inode.in->mode) >> 12)) {
549f639d986SXiubo Li 			/*
550f639d986SXiubo Li 			 * NOTE: Here no need to put the 'dfi->last_readdir',
551f639d986SXiubo Li 			 * because when dir_emit stops us it's most likely
552f639d986SXiubo Li 			 * doesn't have enough memory, etc. So for next readdir
553f639d986SXiubo Li 			 * it will continue.
554f639d986SXiubo Li 			 */
5552817b000SSage Weil 			dout("filldir stopping us...\n");
5562817b000SSage Weil 			return 0;
5572817b000SSage Weil 		}
558af9ffa6dSXiubo Li 
559af9ffa6dSXiubo Li 		/* Reset the lengths to their original allocated vals */
56077acfa29SAl Viro 		ctx->pos++;
5612817b000SSage Weil 	}
5622817b000SSage Weil 
563bb48bd4dSChengguang Xu 	ceph_mdsc_put_request(dfi->last_readdir);
564bb48bd4dSChengguang Xu 	dfi->last_readdir = NULL;
565b50c2de5SYan, Zheng 
566bb48bd4dSChengguang Xu 	if (dfi->next_offset > 2) {
567bb48bd4dSChengguang Xu 		frag = dfi->frag;
5682817b000SSage Weil 		goto more;
5692817b000SSage Weil 	}
5702817b000SSage Weil 
5712817b000SSage Weil 	/* more frags? */
572bb48bd4dSChengguang Xu 	if (!ceph_frag_is_rightmost(dfi->frag)) {
573bb48bd4dSChengguang Xu 		frag = ceph_frag_next(dfi->frag);
574f3c4ebe6SYan, Zheng 		if (is_hash_order(ctx->pos)) {
575f3c4ebe6SYan, Zheng 			loff_t new_pos = ceph_make_fpos(ceph_frag_value(frag),
576bb48bd4dSChengguang Xu 							dfi->next_offset, true);
577f3c4ebe6SYan, Zheng 			if (new_pos > ctx->pos)
578f3c4ebe6SYan, Zheng 				ctx->pos = new_pos;
579f3c4ebe6SYan, Zheng 			/* keep last_name */
580f3c4ebe6SYan, Zheng 		} else {
581bb48bd4dSChengguang Xu 			ctx->pos = ceph_make_fpos(frag, dfi->next_offset,
582bb48bd4dSChengguang Xu 							false);
583bb48bd4dSChengguang Xu 			kfree(dfi->last_name);
584bb48bd4dSChengguang Xu 			dfi->last_name = NULL;
585f3c4ebe6SYan, Zheng 		}
5862817b000SSage Weil 		dout("readdir next frag is %x\n", frag);
5872817b000SSage Weil 		goto more;
5882817b000SSage Weil 	}
589bb48bd4dSChengguang Xu 	dfi->file_info.flags |= CEPH_F_ATEND;
5902817b000SSage Weil 
5912817b000SSage Weil 	/*
5922817b000SSage Weil 	 * if dir_release_count still matches the dir, no dentries
5932817b000SSage Weil 	 * were released during the whole readdir, and we should have
5942817b000SSage Weil 	 * the complete dir contents in our cache.
5952817b000SSage Weil 	 */
596bb48bd4dSChengguang Xu 	if (atomic64_read(&ci->i_release_count) ==
597bb48bd4dSChengguang Xu 					dfi->dir_release_count) {
598be655596SSage Weil 		spin_lock(&ci->i_ceph_lock);
599bb48bd4dSChengguang Xu 		if (dfi->dir_ordered_count ==
600bb48bd4dSChengguang Xu 				atomic64_read(&ci->i_ordered_count)) {
60170db4f36SYan, Zheng 			dout(" marking %p complete and ordered\n", inode);
602fdd4e158SYan, Zheng 			/* use i_size to track number of entries in
603fdd4e158SYan, Zheng 			 * readdir cache */
604bb48bd4dSChengguang Xu 			BUG_ON(dfi->readdir_cache_idx < 0);
605bb48bd4dSChengguang Xu 			i_size_write(inode, dfi->readdir_cache_idx *
606fdd4e158SYan, Zheng 				     sizeof(struct dentry*));
607fdd4e158SYan, Zheng 		} else {
608a8673d61SYan, Zheng 			dout(" marking %p complete\n", inode);
609fdd4e158SYan, Zheng 		}
610bb48bd4dSChengguang Xu 		__ceph_dir_set_complete(ci, dfi->dir_release_count,
611bb48bd4dSChengguang Xu 					dfi->dir_ordered_count);
612be655596SSage Weil 		spin_unlock(&ci->i_ceph_lock);
613fdd4e158SYan, Zheng 	}
61477acfa29SAl Viro 	dout("readdir %p file %p done.\n", inode, file);
6152817b000SSage Weil 	return 0;
6162817b000SSage Weil }
6172817b000SSage Weil 
reset_readdir(struct ceph_dir_file_info * dfi)618bb48bd4dSChengguang Xu static void reset_readdir(struct ceph_dir_file_info *dfi)
6192817b000SSage Weil {
620bb48bd4dSChengguang Xu 	if (dfi->last_readdir) {
621bb48bd4dSChengguang Xu 		ceph_mdsc_put_request(dfi->last_readdir);
622bb48bd4dSChengguang Xu 		dfi->last_readdir = NULL;
6232817b000SSage Weil 	}
624bb48bd4dSChengguang Xu 	kfree(dfi->last_name);
625bb48bd4dSChengguang Xu 	dfi->last_name = NULL;
626bb48bd4dSChengguang Xu 	dfi->dir_release_count = 0;
627bb48bd4dSChengguang Xu 	dfi->readdir_cache_idx = -1;
628bb48bd4dSChengguang Xu 	dfi->next_offset = 2;  /* compensate for . and .. */
629bb48bd4dSChengguang Xu 	dfi->file_info.flags &= ~CEPH_F_ATEND;
6302817b000SSage Weil }
6312817b000SSage Weil 
6328974eebdSYan, Zheng /*
6338974eebdSYan, Zheng  * discard buffered readdir content on seekdir(0), or seek to new frag,
6348974eebdSYan, Zheng  * or seek prior to current chunk
6358974eebdSYan, Zheng  */
need_reset_readdir(struct ceph_dir_file_info * dfi,loff_t new_pos)636bb48bd4dSChengguang Xu static bool need_reset_readdir(struct ceph_dir_file_info *dfi, loff_t new_pos)
6378974eebdSYan, Zheng {
6388974eebdSYan, Zheng 	struct ceph_mds_reply_info_parsed *rinfo;
639f3c4ebe6SYan, Zheng 	loff_t chunk_offset;
6408974eebdSYan, Zheng 	if (new_pos == 0)
6418974eebdSYan, Zheng 		return true;
642f3c4ebe6SYan, Zheng 	if (is_hash_order(new_pos)) {
643f3c4ebe6SYan, Zheng 		/* no need to reset last_name for a forward seek when
644f3c4ebe6SYan, Zheng 		 * dentries are sotred in hash order */
645bb48bd4dSChengguang Xu 	} else if (dfi->frag != fpos_frag(new_pos)) {
6468974eebdSYan, Zheng 		return true;
647f3c4ebe6SYan, Zheng 	}
648bb48bd4dSChengguang Xu 	rinfo = dfi->last_readdir ? &dfi->last_readdir->r_reply_info : NULL;
6498974eebdSYan, Zheng 	if (!rinfo || !rinfo->dir_nr)
6508974eebdSYan, Zheng 		return true;
651f3c4ebe6SYan, Zheng 	chunk_offset = rinfo->dir_entries[0].offset;
652f3c4ebe6SYan, Zheng 	return new_pos < chunk_offset ||
653f3c4ebe6SYan, Zheng 	       is_hash_order(new_pos) != is_hash_order(chunk_offset);
6548974eebdSYan, Zheng }
6558974eebdSYan, Zheng 
ceph_dir_llseek(struct file * file,loff_t offset,int whence)656965c8e59SAndrew Morton static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
6572817b000SSage Weil {
658bb48bd4dSChengguang Xu 	struct ceph_dir_file_info *dfi = file->private_data;
6592817b000SSage Weil 	struct inode *inode = file->f_mapping->host;
6602817b000SSage Weil 	loff_t retval;
6612817b000SSage Weil 
6625955102cSAl Viro 	inode_lock(inode);
66306222e49SJosef Bacik 	retval = -EINVAL;
664965c8e59SAndrew Morton 	switch (whence) {
6652817b000SSage Weil 	case SEEK_CUR:
6662817b000SSage Weil 		offset += file->f_pos;
667fcaddb1dSGustavo A. R. Silva 		break;
66806222e49SJosef Bacik 	case SEEK_SET:
66906222e49SJosef Bacik 		break;
670fdd4e158SYan, Zheng 	case SEEK_END:
671fdd4e158SYan, Zheng 		retval = -EOPNOTSUPP;
672fcaddb1dSGustavo A. R. Silva 		goto out;
67306222e49SJosef Bacik 	default:
67406222e49SJosef Bacik 		goto out;
6752817b000SSage Weil 	}
67606222e49SJosef Bacik 
677f0494206SYan, Zheng 	if (offset >= 0) {
678bb48bd4dSChengguang Xu 		if (need_reset_readdir(dfi, offset)) {
679f3c4ebe6SYan, Zheng 			dout("dir_llseek dropping %p content\n", file);
680bb48bd4dSChengguang Xu 			reset_readdir(dfi);
681f3c4ebe6SYan, Zheng 		} else if (is_hash_order(offset) && offset > file->f_pos) {
682f3c4ebe6SYan, Zheng 			/* for hash offset, we don't know if a forward seek
683f3c4ebe6SYan, Zheng 			 * is within same frag */
684bb48bd4dSChengguang Xu 			dfi->dir_release_count = 0;
685bb48bd4dSChengguang Xu 			dfi->readdir_cache_idx = -1;
686f3c4ebe6SYan, Zheng 		}
687f3c4ebe6SYan, Zheng 
6882817b000SSage Weil 		if (offset != file->f_pos) {
6892817b000SSage Weil 			file->f_pos = offset;
6902817b000SSage Weil 			file->f_version = 0;
691bb48bd4dSChengguang Xu 			dfi->file_info.flags &= ~CEPH_F_ATEND;
6922817b000SSage Weil 		}
6932817b000SSage Weil 		retval = offset;
6942817b000SSage Weil 	}
69506222e49SJosef Bacik out:
6965955102cSAl Viro 	inode_unlock(inode);
6972817b000SSage Weil 	return retval;
6982817b000SSage Weil }
6992817b000SSage Weil 
7002817b000SSage Weil /*
701468640e3SSage Weil  * Handle lookups for the hidden .snap directory.
7022817b000SSage Weil  */
ceph_handle_snapdir(struct ceph_mds_request * req,struct dentry * dentry)703aa60cfc3SJeff Layton struct dentry *ceph_handle_snapdir(struct ceph_mds_request *req,
7047a971e2cSJeff Layton 				   struct dentry *dentry)
7052817b000SSage Weil {
706*985b9ee8SXiubo Li 	struct ceph_fs_client *fsc = ceph_sb_to_fs_client(dentry->d_sb);
707810313c5Shongnanli 	struct inode *parent = d_inode(dentry->d_parent); /* we hold i_rwsem */
7082817b000SSage Weil 
7092817b000SSage Weil 	/* .snap dir? */
7107a971e2cSJeff Layton 	if (ceph_snap(parent) == CEPH_NOSNAP &&
711aa60cfc3SJeff Layton 	    strcmp(dentry->d_name.name, fsc->mount_options->snapdir_name) == 0) {
712aa60cfc3SJeff Layton 		struct dentry *res;
7132817b000SSage Weil 		struct inode *inode = ceph_get_snapdir(parent);
714aa60cfc3SJeff Layton 
715aa60cfc3SJeff Layton 		res = d_splice_alias(inode, dentry);
716aa60cfc3SJeff Layton 		dout("ENOENT on snapdir %p '%pd', linking to snapdir %p. Spliced dentry %p\n",
717aa60cfc3SJeff Layton 		     dentry, dentry, inode, res);
718aa60cfc3SJeff Layton 		if (res)
719aa60cfc3SJeff Layton 			dentry = res;
7202817b000SSage Weil 	}
721aa60cfc3SJeff Layton 	return dentry;
722468640e3SSage Weil }
7232817b000SSage Weil 
724468640e3SSage Weil /*
725468640e3SSage Weil  * Figure out final result of a lookup/open request.
726468640e3SSage Weil  *
727468640e3SSage Weil  * Mainly, make sure we return the final req->r_dentry (if it already
728468640e3SSage Weil  * existed) in place of the original VFS-provided dentry when they
729468640e3SSage Weil  * differ.
730468640e3SSage Weil  *
731468640e3SSage Weil  * Gracefully handle the case where the MDS replies with -ENOENT and
732468640e3SSage Weil  * no trace (which it may do, at its discretion, e.g., if it doesn't
733468640e3SSage Weil  * care to issue a lease on the negative dentry).
734468640e3SSage Weil  */
ceph_finish_lookup(struct ceph_mds_request * req,struct dentry * dentry,int err)735468640e3SSage Weil struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
736468640e3SSage Weil 				  struct dentry *dentry, int err)
737468640e3SSage Weil {
7382817b000SSage Weil 	if (err == -ENOENT) {
7392817b000SSage Weil 		/* no trace? */
7402817b000SSage Weil 		err = 0;
7412817b000SSage Weil 		if (!req->r_reply_info.head->is_dentry) {
7422817b000SSage Weil 			dout("ENOENT and no trace, dentry %p inode %p\n",
7432b0143b5SDavid Howells 			     dentry, d_inode(dentry));
7442b0143b5SDavid Howells 			if (d_really_is_positive(dentry)) {
7452817b000SSage Weil 				d_drop(dentry);
7462817b000SSage Weil 				err = -ENOENT;
7472817b000SSage Weil 			} else {
7482817b000SSage Weil 				d_add(dentry, NULL);
7492817b000SSage Weil 			}
7502817b000SSage Weil 		}
7512817b000SSage Weil 	}
7522817b000SSage Weil 	if (err)
7532817b000SSage Weil 		dentry = ERR_PTR(err);
7542817b000SSage Weil 	else if (dentry != req->r_dentry)
7552817b000SSage Weil 		dentry = dget(req->r_dentry);   /* we got spliced */
7562817b000SSage Weil 	else
7572817b000SSage Weil 		dentry = NULL;
7582817b000SSage Weil 	return dentry;
7592817b000SSage Weil }
7602817b000SSage Weil 
is_root_ceph_dentry(struct inode * inode,struct dentry * dentry)7613b33f692SZhang Zhuoyu static bool is_root_ceph_dentry(struct inode *inode, struct dentry *dentry)
7621d1de916SSage Weil {
7631d1de916SSage Weil 	return ceph_ino(inode) == CEPH_INO_ROOT &&
7641d1de916SSage Weil 		strncmp(dentry->d_name.name, ".ceph", 5) == 0;
7651d1de916SSage Weil }
7661d1de916SSage Weil 
7672817b000SSage Weil /*
7682817b000SSage Weil  * Look up a single dir entry.  If there is a lookup intent, inform
7692817b000SSage Weil  * the MDS so that it gets our 'caps wanted' value in a single op.
7702817b000SSage Weil  */
ceph_lookup(struct inode * dir,struct dentry * dentry,unsigned int flags)7712817b000SSage Weil static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
77200cd8dd3SAl Viro 				  unsigned int flags)
7732817b000SSage Weil {
774*985b9ee8SXiubo Li 	struct ceph_fs_client *fsc = ceph_sb_to_fs_client(dir->i_sb);
7752678da88SXiubo Li 	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
7762817b000SSage Weil 	struct ceph_mds_request *req;
7772817b000SSage Weil 	int op;
778315f2408SYan, Zheng 	int mask;
7792817b000SSage Weil 	int err;
7802817b000SSage Weil 
781a455589fSAl Viro 	dout("lookup %p dentry %p '%pd'\n",
782a455589fSAl Viro 	     dir, dentry, dentry);
7832817b000SSage Weil 
7842817b000SSage Weil 	if (dentry->d_name.len > NAME_MAX)
7852817b000SSage Weil 		return ERR_PTR(-ENAMETOOLONG);
7862817b000SSage Weil 
787cb3524a8SJeff Layton 	if (IS_ENCRYPTED(dir)) {
788d9ae977dSLuís Henriques 		bool had_key = fscrypt_has_encryption_key(dir);
789d9ae977dSLuís Henriques 
790d9ae977dSLuís Henriques 		err = fscrypt_prepare_lookup_partial(dir, dentry);
79114e034a6SLuís Henriques 		if (err < 0)
792cb3524a8SJeff Layton 			return ERR_PTR(err);
793d9ae977dSLuís Henriques 
794d9ae977dSLuís Henriques 		/* mark directory as incomplete if it has been unlocked */
795d9ae977dSLuís Henriques 		if (!had_key && fscrypt_has_encryption_key(dir))
796d9ae977dSLuís Henriques 			ceph_dir_clear_complete(dir);
797cb3524a8SJeff Layton 	}
798cb3524a8SJeff Layton 
7992817b000SSage Weil 	/* can we conclude ENOENT locally? */
8002b0143b5SDavid Howells 	if (d_really_is_negative(dentry)) {
8012817b000SSage Weil 		struct ceph_inode_info *ci = ceph_inode(dir);
8022817b000SSage Weil 		struct ceph_dentry_info *di = ceph_dentry(dentry);
8032817b000SSage Weil 
804be655596SSage Weil 		spin_lock(&ci->i_ceph_lock);
805891f3f5aSJeff Layton 		dout(" dir %p flags are 0x%lx\n", dir, ci->i_ceph_flags);
8062817b000SSage Weil 		if (strncmp(dentry->d_name.name,
8073d14c5d2SYehuda Sadeh 			    fsc->mount_options->snapdir_name,
8082817b000SSage Weil 			    dentry->d_name.len) &&
8091d1de916SSage Weil 		    !is_root_ceph_dentry(dir, dentry) &&
810e2c3de04SYan, Zheng 		    ceph_test_mount_opt(fsc, DCACHE) &&
8112f276c51SYan, Zheng 		    __ceph_dir_is_complete(ci) &&
8121af16d54SXiubo Li 		    __ceph_caps_issued_mask_metric(ci, CEPH_CAP_FILE_SHARED, 1)) {
813719a2514SYan, Zheng 			__ceph_touch_fmode(ci, mdsc, CEPH_FILE_MODE_RD);
814be655596SSage Weil 			spin_unlock(&ci->i_ceph_lock);
8152817b000SSage Weil 			dout(" dir %p complete, -ENOENT\n", dir);
8162817b000SSage Weil 			d_add(dentry, NULL);
81797aeb6bfSYan, Zheng 			di->lease_shared_gen = atomic_read(&ci->i_shared_gen);
8182817b000SSage Weil 			return NULL;
8192817b000SSage Weil 		}
820be655596SSage Weil 		spin_unlock(&ci->i_ceph_lock);
8212817b000SSage Weil 	}
8222817b000SSage Weil 
8232817b000SSage Weil 	op = ceph_snap(dir) == CEPH_SNAPDIR ?
8242817b000SSage Weil 		CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_LOOKUP;
8252817b000SSage Weil 	req = ceph_mdsc_create_request(mdsc, op, USE_ANY_MDS);
8262817b000SSage Weil 	if (IS_ERR(req))
8277e34bc52SJulia Lawall 		return ERR_CAST(req);
8282817b000SSage Weil 	req->r_dentry = dget(dentry);
8292817b000SSage Weil 	req->r_num_caps = 2;
830315f2408SYan, Zheng 
831315f2408SYan, Zheng 	mask = CEPH_STAT_CAP_INODE | CEPH_CAP_AUTH_SHARED;
832315f2408SYan, Zheng 	if (ceph_security_xattr_wanted(dir))
833315f2408SYan, Zheng 		mask |= CEPH_CAP_XATTR_SHARED;
834315f2408SYan, Zheng 	req->r_args.getattr.mask = cpu_to_le32(mask);
835315f2408SYan, Zheng 
8364c183472SJeff Layton 	ihold(dir);
8373dd69aabSJeff Layton 	req->r_parent = dir;
8383dd69aabSJeff Layton 	set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
8392817b000SSage Weil 	err = ceph_mdsc_do_request(mdsc, NULL, req);
8407a971e2cSJeff Layton 	if (err == -ENOENT) {
8417a971e2cSJeff Layton 		struct dentry *res;
8427a971e2cSJeff Layton 
8437a971e2cSJeff Layton 		res = ceph_handle_snapdir(req, dentry);
844aa60cfc3SJeff Layton 		if (IS_ERR(res)) {
845aa60cfc3SJeff Layton 			err = PTR_ERR(res);
846aa60cfc3SJeff Layton 		} else {
847aa60cfc3SJeff Layton 			dentry = res;
848aa60cfc3SJeff Layton 			err = 0;
849aa60cfc3SJeff Layton 		}
8507a971e2cSJeff Layton 	}
8512817b000SSage Weil 	dentry = ceph_finish_lookup(req, dentry, err);
8522817b000SSage Weil 	ceph_mdsc_put_request(req);  /* will dput(dentry) */
8532817b000SSage Weil 	dout("lookup result=%p\n", dentry);
8542817b000SSage Weil 	return dentry;
8552817b000SSage Weil }
8562817b000SSage Weil 
8572817b000SSage Weil /*
8582817b000SSage Weil  * If we do a create but get no trace back from the MDS, follow up with
8592817b000SSage Weil  * a lookup (the VFS expects us to link up the provided dentry).
8602817b000SSage Weil  */
ceph_handle_notrace_create(struct inode * dir,struct dentry * dentry)8612817b000SSage Weil int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry)
8622817b000SSage Weil {
86300cd8dd3SAl Viro 	struct dentry *result = ceph_lookup(dir, dentry, 0);
8642817b000SSage Weil 
8652817b000SSage Weil 	if (result && !IS_ERR(result)) {
8662817b000SSage Weil 		/*
8672817b000SSage Weil 		 * We created the item, then did a lookup, and found
8682817b000SSage Weil 		 * it was already linked to another inode we already
8694d41cef2SYan, Zheng 		 * had in our cache (and thus got spliced). To not
8704d41cef2SYan, Zheng 		 * confuse VFS (especially when inode is a directory),
8714d41cef2SYan, Zheng 		 * we don't link our dentry to that inode, return an
8724d41cef2SYan, Zheng 		 * error instead.
8734d41cef2SYan, Zheng 		 *
8744d41cef2SYan, Zheng 		 * This event should be rare and it happens only when
8754d41cef2SYan, Zheng 		 * we talk to old MDS. Recent MDS does not send traceless
8764d41cef2SYan, Zheng 		 * reply for request that creates new inode.
8772817b000SSage Weil 		 */
8785cba372cSYan, Zheng 		d_drop(result);
8794d41cef2SYan, Zheng 		return -ESTALE;
8802817b000SSage Weil 	}
8812817b000SSage Weil 	return PTR_ERR(result);
8822817b000SSage Weil }
8832817b000SSage Weil 
ceph_mknod(struct mnt_idmap * idmap,struct inode * dir,struct dentry * dentry,umode_t mode,dev_t rdev)8845ebb29beSChristian Brauner static int ceph_mknod(struct mnt_idmap *idmap, struct inode *dir,
885549c7297SChristian Brauner 		      struct dentry *dentry, umode_t mode, dev_t rdev)
8862817b000SSage Weil {
8872678da88SXiubo Li 	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
8882817b000SSage Weil 	struct ceph_mds_request *req;
8895c31e92dSYan, Zheng 	struct ceph_acl_sec_ctx as_ctx = {};
8902817b000SSage Weil 	int err;
8912817b000SSage Weil 
8922817b000SSage Weil 	if (ceph_snap(dir) != CEPH_NOSNAP)
8932817b000SSage Weil 		return -EROFS;
8942817b000SSage Weil 
8954868e537SXiubo Li 	err = ceph_wait_on_conflict_unlink(dentry);
8964868e537SXiubo Li 	if (err)
8974868e537SXiubo Li 		return err;
8984868e537SXiubo Li 
8990459871cSChengguang Xu 	if (ceph_quota_is_max_files_exceeded(dir)) {
9000459871cSChengguang Xu 		err = -EDQUOT;
9010459871cSChengguang Xu 		goto out;
9020459871cSChengguang Xu 	}
903b7a29217SLuis Henriques 
9041a67aafbSAl Viro 	dout("mknod in dir %p dentry %p mode 0%ho rdev %d\n",
9052817b000SSage Weil 	     dir, dentry, mode, rdev);
9062817b000SSage Weil 	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_MKNOD, USE_AUTH_MDS);
9072817b000SSage Weil 	if (IS_ERR(req)) {
908b1ee94aaSYan, Zheng 		err = PTR_ERR(req);
909b1ee94aaSYan, Zheng 		goto out;
9102817b000SSage Weil 	}
911ec9595c0SJeff Layton 
912ec9595c0SJeff Layton 	req->r_new_inode = ceph_new_inode(dir, dentry, &mode, &as_ctx);
913ec9595c0SJeff Layton 	if (IS_ERR(req->r_new_inode)) {
914ec9595c0SJeff Layton 		err = PTR_ERR(req->r_new_inode);
915ec9595c0SJeff Layton 		req->r_new_inode = NULL;
916ec9595c0SJeff Layton 		goto out_req;
917ec9595c0SJeff Layton 	}
918ec9595c0SJeff Layton 
91916be62fcSJeff Layton 	if (S_ISREG(mode) && IS_ENCRYPTED(dir))
92016be62fcSJeff Layton 		set_bit(CEPH_MDS_R_FSCRYPT_FILE, &req->r_req_flags);
92116be62fcSJeff Layton 
9222817b000SSage Weil 	req->r_dentry = dget(dentry);
9232817b000SSage Weil 	req->r_num_caps = 2;
9243dd69aabSJeff Layton 	req->r_parent = dir;
9254c183472SJeff Layton 	ihold(dir);
9263dd69aabSJeff Layton 	set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
9272817b000SSage Weil 	req->r_args.mknod.mode = cpu_to_le32(mode);
9282817b000SSage Weil 	req->r_args.mknod.rdev = cpu_to_le32(rdev);
929d9d00f71SXiubo Li 	req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL |
930d9d00f71SXiubo Li 			     CEPH_CAP_XATTR_EXCL;
9312817b000SSage Weil 	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
932ec9595c0SJeff Layton 
933ec9595c0SJeff Layton 	ceph_as_ctx_to_req(req, &as_ctx);
934ec9595c0SJeff Layton 
9352817b000SSage Weil 	err = ceph_mdsc_do_request(mdsc, dir, req);
9362817b000SSage Weil 	if (!err && !req->r_reply_info.head->is_dentry)
9372817b000SSage Weil 		err = ceph_handle_notrace_create(dir, dentry);
938ec9595c0SJeff Layton out_req:
9392817b000SSage Weil 	ceph_mdsc_put_request(req);
940b1ee94aaSYan, Zheng out:
9417221fe4cSGuangliang Zhao 	if (!err)
9425c31e92dSYan, Zheng 		ceph_init_inode_acls(d_inode(dentry), &as_ctx);
943b20a95a0SYan, Zheng 	else
9442817b000SSage Weil 		d_drop(dentry);
9455c31e92dSYan, Zheng 	ceph_release_acl_sec_ctx(&as_ctx);
9462817b000SSage Weil 	return err;
9472817b000SSage Weil }
9482817b000SSage Weil 
ceph_create(struct mnt_idmap * idmap,struct inode * dir,struct dentry * dentry,umode_t mode,bool excl)9496c960e68SChristian Brauner static int ceph_create(struct mnt_idmap *idmap, struct inode *dir,
950549c7297SChristian Brauner 		       struct dentry *dentry, umode_t mode, bool excl)
9512817b000SSage Weil {
9525ebb29beSChristian Brauner 	return ceph_mknod(idmap, dir, dentry, mode, 0);
9532817b000SSage Weil }
9542817b000SSage Weil 
95579f2f6adSJeff Layton #if IS_ENABLED(CONFIG_FS_ENCRYPTION)
prep_encrypted_symlink_target(struct ceph_mds_request * req,const char * dest)95679f2f6adSJeff Layton static int prep_encrypted_symlink_target(struct ceph_mds_request *req,
95779f2f6adSJeff Layton 					 const char *dest)
95879f2f6adSJeff Layton {
95979f2f6adSJeff Layton 	int err;
96079f2f6adSJeff Layton 	int len = strlen(dest);
96179f2f6adSJeff Layton 	struct fscrypt_str osd_link = FSTR_INIT(NULL, 0);
96279f2f6adSJeff Layton 
96379f2f6adSJeff Layton 	err = fscrypt_prepare_symlink(req->r_parent, dest, len, PATH_MAX,
96479f2f6adSJeff Layton 				      &osd_link);
96579f2f6adSJeff Layton 	if (err)
96679f2f6adSJeff Layton 		goto out;
96779f2f6adSJeff Layton 
96879f2f6adSJeff Layton 	err = fscrypt_encrypt_symlink(req->r_new_inode, dest, len, &osd_link);
96979f2f6adSJeff Layton 	if (err)
97079f2f6adSJeff Layton 		goto out;
97179f2f6adSJeff Layton 
97279f2f6adSJeff Layton 	req->r_path2 = kmalloc(CEPH_BASE64_CHARS(osd_link.len) + 1, GFP_KERNEL);
97379f2f6adSJeff Layton 	if (!req->r_path2) {
97479f2f6adSJeff Layton 		err = -ENOMEM;
97579f2f6adSJeff Layton 		goto out;
97679f2f6adSJeff Layton 	}
97779f2f6adSJeff Layton 
97879f2f6adSJeff Layton 	len = ceph_base64_encode(osd_link.name, osd_link.len, req->r_path2);
97979f2f6adSJeff Layton 	req->r_path2[len] = '\0';
98079f2f6adSJeff Layton out:
98179f2f6adSJeff Layton 	fscrypt_fname_free_buffer(&osd_link);
98279f2f6adSJeff Layton 	return err;
98379f2f6adSJeff Layton }
98479f2f6adSJeff Layton #else
prep_encrypted_symlink_target(struct ceph_mds_request * req,const char * dest)98579f2f6adSJeff Layton static int prep_encrypted_symlink_target(struct ceph_mds_request *req,
98679f2f6adSJeff Layton 					 const char *dest)
98779f2f6adSJeff Layton {
98879f2f6adSJeff Layton 	return -EOPNOTSUPP;
98979f2f6adSJeff Layton }
99079f2f6adSJeff Layton #endif
99179f2f6adSJeff Layton 
ceph_symlink(struct mnt_idmap * idmap,struct inode * dir,struct dentry * dentry,const char * dest)9927a77db95SChristian Brauner static int ceph_symlink(struct mnt_idmap *idmap, struct inode *dir,
993549c7297SChristian Brauner 			struct dentry *dentry, const char *dest)
9942817b000SSage Weil {
9952678da88SXiubo Li 	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
9962817b000SSage Weil 	struct ceph_mds_request *req;
997ac6713ccSYan, Zheng 	struct ceph_acl_sec_ctx as_ctx = {};
998ec9595c0SJeff Layton 	umode_t mode = S_IFLNK | 0777;
9992817b000SSage Weil 	int err;
10002817b000SSage Weil 
10012817b000SSage Weil 	if (ceph_snap(dir) != CEPH_NOSNAP)
10022817b000SSage Weil 		return -EROFS;
10032817b000SSage Weil 
10044868e537SXiubo Li 	err = ceph_wait_on_conflict_unlink(dentry);
10054868e537SXiubo Li 	if (err)
10064868e537SXiubo Li 		return err;
10074868e537SXiubo Li 
100867fcd151SChengguang Xu 	if (ceph_quota_is_max_files_exceeded(dir)) {
100967fcd151SChengguang Xu 		err = -EDQUOT;
101067fcd151SChengguang Xu 		goto out;
101167fcd151SChengguang Xu 	}
1012b7a29217SLuis Henriques 
10132817b000SSage Weil 	dout("symlink in dir %p dentry %p to '%s'\n", dir, dentry, dest);
10142817b000SSage Weil 	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SYMLINK, USE_AUTH_MDS);
10152817b000SSage Weil 	if (IS_ERR(req)) {
1016b1ee94aaSYan, Zheng 		err = PTR_ERR(req);
1017b1ee94aaSYan, Zheng 		goto out;
10182817b000SSage Weil 	}
1019ec9595c0SJeff Layton 
1020ec9595c0SJeff Layton 	req->r_new_inode = ceph_new_inode(dir, dentry, &mode, &as_ctx);
1021ec9595c0SJeff Layton 	if (IS_ERR(req->r_new_inode)) {
1022ec9595c0SJeff Layton 		err = PTR_ERR(req->r_new_inode);
1023ec9595c0SJeff Layton 		req->r_new_inode = NULL;
1024ec9595c0SJeff Layton 		goto out_req;
1025ec9595c0SJeff Layton 	}
1026ec9595c0SJeff Layton 
102779f2f6adSJeff Layton 	req->r_parent = dir;
102879f2f6adSJeff Layton 	ihold(dir);
102979f2f6adSJeff Layton 
103079f2f6adSJeff Layton 	if (IS_ENCRYPTED(req->r_new_inode)) {
103179f2f6adSJeff Layton 		err = prep_encrypted_symlink_target(req, dest);
103279f2f6adSJeff Layton 		if (err)
103379f2f6adSJeff Layton 			goto out_req;
103479f2f6adSJeff Layton 	} else {
1035687265e5SYan, Zheng 		req->r_path2 = kstrdup(dest, GFP_KERNEL);
1036a149bb9aSSanidhya Kashyap 		if (!req->r_path2) {
1037a149bb9aSSanidhya Kashyap 			err = -ENOMEM;
1038ec9595c0SJeff Layton 			goto out_req;
1039a149bb9aSSanidhya Kashyap 		}
104079f2f6adSJeff Layton 	}
10414c183472SJeff Layton 
10423dd69aabSJeff Layton 	set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
10432817b000SSage Weil 	req->r_dentry = dget(dentry);
10442817b000SSage Weil 	req->r_num_caps = 2;
1045d9d00f71SXiubo Li 	req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL |
1046d9d00f71SXiubo Li 			     CEPH_CAP_XATTR_EXCL;
10472817b000SSage Weil 	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
1048ec9595c0SJeff Layton 
1049ec9595c0SJeff Layton 	ceph_as_ctx_to_req(req, &as_ctx);
1050ec9595c0SJeff Layton 
10512817b000SSage Weil 	err = ceph_mdsc_do_request(mdsc, dir, req);
10522817b000SSage Weil 	if (!err && !req->r_reply_info.head->is_dentry)
10532817b000SSage Weil 		err = ceph_handle_notrace_create(dir, dentry);
1054ec9595c0SJeff Layton out_req:
10552817b000SSage Weil 	ceph_mdsc_put_request(req);
1056b1ee94aaSYan, Zheng out:
1057b1ee94aaSYan, Zheng 	if (err)
10582817b000SSage Weil 		d_drop(dentry);
1059ac6713ccSYan, Zheng 	ceph_release_acl_sec_ctx(&as_ctx);
10602817b000SSage Weil 	return err;
10612817b000SSage Weil }
10622817b000SSage Weil 
ceph_mkdir(struct mnt_idmap * idmap,struct inode * dir,struct dentry * dentry,umode_t mode)1063c54bd91eSChristian Brauner static int ceph_mkdir(struct mnt_idmap *idmap, struct inode *dir,
1064549c7297SChristian Brauner 		      struct dentry *dentry, umode_t mode)
10652817b000SSage Weil {
10662678da88SXiubo Li 	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
10672817b000SSage Weil 	struct ceph_mds_request *req;
10685c31e92dSYan, Zheng 	struct ceph_acl_sec_ctx as_ctx = {};
10694868e537SXiubo Li 	int err;
10702817b000SSage Weil 	int op;
10712817b000SSage Weil 
10724868e537SXiubo Li 	err = ceph_wait_on_conflict_unlink(dentry);
10734868e537SXiubo Li 	if (err)
10744868e537SXiubo Li 		return err;
10754868e537SXiubo Li 
10762817b000SSage Weil 	if (ceph_snap(dir) == CEPH_SNAPDIR) {
10772817b000SSage Weil 		/* mkdir .snap/foo is a MKSNAP */
10782817b000SSage Weil 		op = CEPH_MDS_OP_MKSNAP;
1079a455589fSAl Viro 		dout("mksnap dir %p snap '%pd' dn %p\n", dir,
1080a455589fSAl Viro 		     dentry, dentry);
10812817b000SSage Weil 	} else if (ceph_snap(dir) == CEPH_NOSNAP) {
108218bb1db3SAl Viro 		dout("mkdir dir %p dn %p mode 0%ho\n", dir, dentry, mode);
10832817b000SSage Weil 		op = CEPH_MDS_OP_MKDIR;
10842817b000SSage Weil 	} else {
10854868e537SXiubo Li 		err = -EROFS;
10862817b000SSage Weil 		goto out;
10872817b000SSage Weil 	}
1088b1ee94aaSYan, Zheng 
108925963669SYan, Zheng 	if (op == CEPH_MDS_OP_MKDIR &&
109025963669SYan, Zheng 	    ceph_quota_is_max_files_exceeded(dir)) {
1091b7a29217SLuis Henriques 		err = -EDQUOT;
1092b7a29217SLuis Henriques 		goto out;
1093b7a29217SLuis Henriques 	}
1094abd4fc77SLuís Henriques 	if ((op == CEPH_MDS_OP_MKSNAP) && IS_ENCRYPTED(dir) &&
1095abd4fc77SLuís Henriques 	    !fscrypt_has_encryption_key(dir)) {
1096abd4fc77SLuís Henriques 		err = -ENOKEY;
1097abd4fc77SLuís Henriques 		goto out;
1098abd4fc77SLuís Henriques 	}
1099b7a29217SLuis Henriques 
1100b1ee94aaSYan, Zheng 
11012817b000SSage Weil 	req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
11022817b000SSage Weil 	if (IS_ERR(req)) {
11032817b000SSage Weil 		err = PTR_ERR(req);
11042817b000SSage Weil 		goto out;
11052817b000SSage Weil 	}
11062817b000SSage Weil 
1107ec9595c0SJeff Layton 	mode |= S_IFDIR;
1108ec9595c0SJeff Layton 	req->r_new_inode = ceph_new_inode(dir, dentry, &mode, &as_ctx);
1109ec9595c0SJeff Layton 	if (IS_ERR(req->r_new_inode)) {
1110ec9595c0SJeff Layton 		err = PTR_ERR(req->r_new_inode);
1111ec9595c0SJeff Layton 		req->r_new_inode = NULL;
1112ec9595c0SJeff Layton 		goto out_req;
1113ec9595c0SJeff Layton 	}
1114ec9595c0SJeff Layton 
11152817b000SSage Weil 	req->r_dentry = dget(dentry);
11162817b000SSage Weil 	req->r_num_caps = 2;
11173dd69aabSJeff Layton 	req->r_parent = dir;
11184c183472SJeff Layton 	ihold(dir);
11193dd69aabSJeff Layton 	set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
11202817b000SSage Weil 	req->r_args.mkdir.mode = cpu_to_le32(mode);
1121d9d00f71SXiubo Li 	req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL |
1122d9d00f71SXiubo Li 			     CEPH_CAP_XATTR_EXCL;
11232817b000SSage Weil 	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
1124ec9595c0SJeff Layton 
1125ec9595c0SJeff Layton 	ceph_as_ctx_to_req(req, &as_ctx);
1126ec9595c0SJeff Layton 
11272817b000SSage Weil 	err = ceph_mdsc_do_request(mdsc, dir, req);
1128275dd19eSYan, Zheng 	if (!err &&
1129275dd19eSYan, Zheng 	    !req->r_reply_info.head->is_target &&
1130275dd19eSYan, Zheng 	    !req->r_reply_info.head->is_dentry)
11312817b000SSage Weil 		err = ceph_handle_notrace_create(dir, dentry);
1132ec9595c0SJeff Layton out_req:
11332817b000SSage Weil 	ceph_mdsc_put_request(req);
11342817b000SSage Weil out:
1135b20a95a0SYan, Zheng 	if (!err)
11365c31e92dSYan, Zheng 		ceph_init_inode_acls(d_inode(dentry), &as_ctx);
1137b20a95a0SYan, Zheng 	else
11382817b000SSage Weil 		d_drop(dentry);
11395c31e92dSYan, Zheng 	ceph_release_acl_sec_ctx(&as_ctx);
11402817b000SSage Weil 	return err;
11412817b000SSage Weil }
11422817b000SSage Weil 
ceph_link(struct dentry * old_dentry,struct inode * dir,struct dentry * dentry)11432817b000SSage Weil static int ceph_link(struct dentry *old_dentry, struct inode *dir,
11442817b000SSage Weil 		     struct dentry *dentry)
11452817b000SSage Weil {
11462678da88SXiubo Li 	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
11472817b000SSage Weil 	struct ceph_mds_request *req;
11482817b000SSage Weil 	int err;
11492817b000SSage Weil 
1150a5ffd7b6SXiubo Li 	if (dentry->d_flags & DCACHE_DISCONNECTED)
1151a5ffd7b6SXiubo Li 		return -EINVAL;
1152a5ffd7b6SXiubo Li 
11534868e537SXiubo Li 	err = ceph_wait_on_conflict_unlink(dentry);
11544868e537SXiubo Li 	if (err)
11554868e537SXiubo Li 		return err;
11564868e537SXiubo Li 
11572817b000SSage Weil 	if (ceph_snap(dir) != CEPH_NOSNAP)
11582817b000SSage Weil 		return -EROFS;
11592817b000SSage Weil 
116094af0470SJeff Layton 	err = fscrypt_prepare_link(old_dentry, dir, dentry);
116194af0470SJeff Layton 	if (err)
116294af0470SJeff Layton 		return err;
116394af0470SJeff Layton 
1164a5ffd7b6SXiubo Li 	dout("link in dir %p %llx.%llx old_dentry %p:'%pd' dentry %p:'%pd'\n",
1165a5ffd7b6SXiubo Li 	     dir, ceph_vinop(dir), old_dentry, old_dentry, dentry, dentry);
11662817b000SSage Weil 	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LINK, USE_AUTH_MDS);
11672817b000SSage Weil 	if (IS_ERR(req)) {
11682817b000SSage Weil 		d_drop(dentry);
11692817b000SSage Weil 		return PTR_ERR(req);
11702817b000SSage Weil 	}
11712817b000SSage Weil 	req->r_dentry = dget(dentry);
11722817b000SSage Weil 	req->r_num_caps = 2;
11734b58c9b1SSage Weil 	req->r_old_dentry = dget(old_dentry);
1174a5ffd7b6SXiubo Li 	/*
1175a5ffd7b6SXiubo Li 	 * The old_dentry maybe a DCACHE_DISCONNECTED dentry, then we
1176a5ffd7b6SXiubo Li 	 * will just pass the ino# to MDSs.
1177a5ffd7b6SXiubo Li 	 */
1178a5ffd7b6SXiubo Li 	if (old_dentry->d_flags & DCACHE_DISCONNECTED)
1179a5ffd7b6SXiubo Li 		req->r_ino2 = ceph_vino(d_inode(old_dentry));
11803dd69aabSJeff Layton 	req->r_parent = dir;
11814c183472SJeff Layton 	ihold(dir);
11823dd69aabSJeff Layton 	set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
1183d9d00f71SXiubo Li 	req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_XATTR_EXCL;
11842817b000SSage Weil 	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
1185ad88f23fSYan, Zheng 	/* release LINK_SHARED on source inode (mds will lock it) */
1186d19a0b54SYan, Zheng 	req->r_old_inode_drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL;
11872817b000SSage Weil 	err = ceph_mdsc_do_request(mdsc, dir, req);
118870b666c3SSage Weil 	if (err) {
11892817b000SSage Weil 		d_drop(dentry);
119070b666c3SSage Weil 	} else if (!req->r_reply_info.head->is_dentry) {
11912b0143b5SDavid Howells 		ihold(d_inode(old_dentry));
11922b0143b5SDavid Howells 		d_instantiate(dentry, d_inode(old_dentry));
119370b666c3SSage Weil 	}
11942817b000SSage Weil 	ceph_mdsc_put_request(req);
11952817b000SSage Weil 	return err;
11962817b000SSage Weil }
11972817b000SSage Weil 
ceph_async_unlink_cb(struct ceph_mds_client * mdsc,struct ceph_mds_request * req)11982ccb4546SJeff Layton static void ceph_async_unlink_cb(struct ceph_mds_client *mdsc,
11992ccb4546SJeff Layton 				 struct ceph_mds_request *req)
12002ccb4546SJeff Layton {
12014868e537SXiubo Li 	struct dentry *dentry = req->r_dentry;
1202*985b9ee8SXiubo Li 	struct ceph_fs_client *fsc = ceph_sb_to_fs_client(dentry->d_sb);
12034868e537SXiubo Li 	struct ceph_dentry_info *di = ceph_dentry(dentry);
12042ccb4546SJeff Layton 	int result = req->r_err ? req->r_err :
12052ccb4546SJeff Layton 			le32_to_cpu(req->r_reply_info.head->result);
12062ccb4546SJeff Layton 
12074868e537SXiubo Li 	if (!test_bit(CEPH_DENTRY_ASYNC_UNLINK_BIT, &di->flags))
12084868e537SXiubo Li 		pr_warn("%s dentry %p:%pd async unlink bit is not set\n",
12094868e537SXiubo Li 			__func__, dentry, dentry);
12104868e537SXiubo Li 
12114868e537SXiubo Li 	spin_lock(&fsc->async_unlink_conflict_lock);
12124868e537SXiubo Li 	hash_del_rcu(&di->hnode);
12134868e537SXiubo Li 	spin_unlock(&fsc->async_unlink_conflict_lock);
12144868e537SXiubo Li 
12154868e537SXiubo Li 	spin_lock(&dentry->d_lock);
12164868e537SXiubo Li 	di->flags &= ~CEPH_DENTRY_ASYNC_UNLINK;
12174868e537SXiubo Li 	wake_up_bit(&di->flags, CEPH_DENTRY_ASYNC_UNLINK_BIT);
12184868e537SXiubo Li 	spin_unlock(&dentry->d_lock);
12194868e537SXiubo Li 
12204868e537SXiubo Li 	synchronize_rcu();
12214868e537SXiubo Li 
12222ccb4546SJeff Layton 	if (result == -EJUKEBOX)
12232ccb4546SJeff Layton 		goto out;
12242ccb4546SJeff Layton 
12252ccb4546SJeff Layton 	/* If op failed, mark everyone involved for errors */
12262ccb4546SJeff Layton 	if (result) {
12272a575f13SJeff Layton 		int pathlen = 0;
12282a575f13SJeff Layton 		u64 base = 0;
12292e2023e9SXiubo Li 		char *path = ceph_mdsc_build_path(mdsc, dentry, &pathlen,
12302ccb4546SJeff Layton 						  &base, 0);
12312ccb4546SJeff Layton 
12322ccb4546SJeff Layton 		/* mark error on parent + clear complete */
12332ccb4546SJeff Layton 		mapping_set_error(req->r_parent->i_mapping, result);
12342ccb4546SJeff Layton 		ceph_dir_clear_complete(req->r_parent);
12352ccb4546SJeff Layton 
12362ccb4546SJeff Layton 		/* drop the dentry -- we don't know its status */
12374868e537SXiubo Li 		if (!d_unhashed(dentry))
12384868e537SXiubo Li 			d_drop(dentry);
12392ccb4546SJeff Layton 
12402ccb4546SJeff Layton 		/* mark inode itself for an error (since metadata is bogus) */
12412ccb4546SJeff Layton 		mapping_set_error(req->r_old_inode->i_mapping, result);
12422ccb4546SJeff Layton 
12434868e537SXiubo Li 		pr_warn("async unlink failure path=(%llx)%s result=%d!\n",
12442ccb4546SJeff Layton 			base, IS_ERR(path) ? "<<bad>>" : path, result);
12452ccb4546SJeff Layton 		ceph_mdsc_free_path(path, pathlen);
12462ccb4546SJeff Layton 	}
12472ccb4546SJeff Layton out:
12482ccb4546SJeff Layton 	iput(req->r_old_inode);
12492ccb4546SJeff Layton 	ceph_mdsc_release_dir_caps(req);
12502ccb4546SJeff Layton }
12512ccb4546SJeff Layton 
get_caps_for_async_unlink(struct inode * dir,struct dentry * dentry)12522ccb4546SJeff Layton static int get_caps_for_async_unlink(struct inode *dir, struct dentry *dentry)
12532ccb4546SJeff Layton {
12542ccb4546SJeff Layton 	struct ceph_inode_info *ci = ceph_inode(dir);
12552ccb4546SJeff Layton 	struct ceph_dentry_info *di;
12562ccb4546SJeff Layton 	int got = 0, want = CEPH_CAP_FILE_EXCL | CEPH_CAP_DIR_UNLINK;
12572ccb4546SJeff Layton 
12582ccb4546SJeff Layton 	spin_lock(&ci->i_ceph_lock);
12592ccb4546SJeff Layton 	if ((__ceph_caps_issued(ci, NULL) & want) == want) {
12602ccb4546SJeff Layton 		ceph_take_cap_refs(ci, want, false);
12612ccb4546SJeff Layton 		got = want;
12622ccb4546SJeff Layton 	}
12632ccb4546SJeff Layton 	spin_unlock(&ci->i_ceph_lock);
12642ccb4546SJeff Layton 
12652ccb4546SJeff Layton 	/* If we didn't get anything, return 0 */
12662ccb4546SJeff Layton 	if (!got)
12672ccb4546SJeff Layton 		return 0;
12682ccb4546SJeff Layton 
12692ccb4546SJeff Layton         spin_lock(&dentry->d_lock);
12702ccb4546SJeff Layton         di = ceph_dentry(dentry);
12712ccb4546SJeff Layton 	/*
12722ccb4546SJeff Layton 	 * - We are holding Fx, which implies Fs caps.
12732ccb4546SJeff Layton 	 * - Only support async unlink for primary linkage
12742ccb4546SJeff Layton 	 */
12752ccb4546SJeff Layton 	if (atomic_read(&ci->i_shared_gen) != di->lease_shared_gen ||
12762ccb4546SJeff Layton 	    !(di->flags & CEPH_DENTRY_PRIMARY_LINK))
12772ccb4546SJeff Layton 		want = 0;
12782ccb4546SJeff Layton         spin_unlock(&dentry->d_lock);
12792ccb4546SJeff Layton 
12802ccb4546SJeff Layton 	/* Do we still want what we've got? */
12812ccb4546SJeff Layton 	if (want == got)
12822ccb4546SJeff Layton 		return got;
12832ccb4546SJeff Layton 
12842ccb4546SJeff Layton 	ceph_put_cap_refs(ci, got);
12852ccb4546SJeff Layton 	return 0;
12862ccb4546SJeff Layton }
12872ccb4546SJeff Layton 
12882817b000SSage Weil /*
12892817b000SSage Weil  * rmdir and unlink are differ only by the metadata op code
12902817b000SSage Weil  */
ceph_unlink(struct inode * dir,struct dentry * dentry)12912817b000SSage Weil static int ceph_unlink(struct inode *dir, struct dentry *dentry)
12922817b000SSage Weil {
1293*985b9ee8SXiubo Li 	struct ceph_fs_client *fsc = ceph_sb_to_fs_client(dir->i_sb);
12943d14c5d2SYehuda Sadeh 	struct ceph_mds_client *mdsc = fsc->mdsc;
12952b0143b5SDavid Howells 	struct inode *inode = d_inode(dentry);
12962817b000SSage Weil 	struct ceph_mds_request *req;
12972ccb4546SJeff Layton 	bool try_async = ceph_test_mount_opt(fsc, ASYNC_DIROPS);
12982817b000SSage Weil 	int err = -EROFS;
12992817b000SSage Weil 	int op;
13002817b000SSage Weil 
13012817b000SSage Weil 	if (ceph_snap(dir) == CEPH_SNAPDIR) {
13022817b000SSage Weil 		/* rmdir .snap/foo is RMSNAP */
1303a455589fSAl Viro 		dout("rmsnap dir %p '%pd' dn %p\n", dir, dentry, dentry);
13042817b000SSage Weil 		op = CEPH_MDS_OP_RMSNAP;
13052817b000SSage Weil 	} else if (ceph_snap(dir) == CEPH_NOSNAP) {
13062817b000SSage Weil 		dout("unlink/rmdir dir %p dn %p inode %p\n",
13072817b000SSage Weil 		     dir, dentry, inode);
1308e36cb0b8SDavid Howells 		op = d_is_dir(dentry) ?
13092817b000SSage Weil 			CEPH_MDS_OP_RMDIR : CEPH_MDS_OP_UNLINK;
13102817b000SSage Weil 	} else
13112817b000SSage Weil 		goto out;
13122ccb4546SJeff Layton retry:
13132817b000SSage Weil 	req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
13142817b000SSage Weil 	if (IS_ERR(req)) {
13152817b000SSage Weil 		err = PTR_ERR(req);
13162817b000SSage Weil 		goto out;
13172817b000SSage Weil 	}
13182817b000SSage Weil 	req->r_dentry = dget(dentry);
13192817b000SSage Weil 	req->r_num_caps = 2;
13203dd69aabSJeff Layton 	req->r_parent = dir;
13214c183472SJeff Layton 	ihold(dir);
1322d9d00f71SXiubo Li 	req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_XATTR_EXCL;
13232817b000SSage Weil 	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
13246ef0bc6dSZhi Zhang 	req->r_inode_drop = ceph_drop_caps_for_unlink(inode);
13252ccb4546SJeff Layton 
13262ccb4546SJeff Layton 	if (try_async && op == CEPH_MDS_OP_UNLINK &&
13272ccb4546SJeff Layton 	    (req->r_dir_caps = get_caps_for_async_unlink(dir, dentry))) {
13284868e537SXiubo Li 		struct ceph_dentry_info *di = ceph_dentry(dentry);
13294868e537SXiubo Li 
1330ebce3eb2SJeff Layton 		dout("async unlink on %llu/%.*s caps=%s", ceph_ino(dir),
13312ccb4546SJeff Layton 		     dentry->d_name.len, dentry->d_name.name,
13322ccb4546SJeff Layton 		     ceph_cap_string(req->r_dir_caps));
13332ccb4546SJeff Layton 		set_bit(CEPH_MDS_R_ASYNC, &req->r_req_flags);
13342ccb4546SJeff Layton 		req->r_callback = ceph_async_unlink_cb;
13352ccb4546SJeff Layton 		req->r_old_inode = d_inode(dentry);
13362ccb4546SJeff Layton 		ihold(req->r_old_inode);
13374868e537SXiubo Li 
13384868e537SXiubo Li 		spin_lock(&dentry->d_lock);
13394868e537SXiubo Li 		di->flags |= CEPH_DENTRY_ASYNC_UNLINK;
13404868e537SXiubo Li 		spin_unlock(&dentry->d_lock);
13414868e537SXiubo Li 
13424868e537SXiubo Li 		spin_lock(&fsc->async_unlink_conflict_lock);
13434868e537SXiubo Li 		hash_add_rcu(fsc->async_unlink_conflict, &di->hnode,
13444868e537SXiubo Li 			     dentry->d_name.hash);
13454868e537SXiubo Li 		spin_unlock(&fsc->async_unlink_conflict_lock);
13464868e537SXiubo Li 
13472ccb4546SJeff Layton 		err = ceph_mdsc_submit_request(mdsc, dir, req);
13482ccb4546SJeff Layton 		if (!err) {
13492ccb4546SJeff Layton 			/*
13502ccb4546SJeff Layton 			 * We have enough caps, so we assume that the unlink
13512ccb4546SJeff Layton 			 * will succeed. Fix up the target inode and dcache.
13522ccb4546SJeff Layton 			 */
13532ccb4546SJeff Layton 			drop_nlink(inode);
13542ccb4546SJeff Layton 			d_delete(dentry);
13554868e537SXiubo Li 		} else {
13564868e537SXiubo Li 			spin_lock(&fsc->async_unlink_conflict_lock);
13574868e537SXiubo Li 			hash_del_rcu(&di->hnode);
13584868e537SXiubo Li 			spin_unlock(&fsc->async_unlink_conflict_lock);
13594868e537SXiubo Li 
13604868e537SXiubo Li 			spin_lock(&dentry->d_lock);
13614868e537SXiubo Li 			di->flags &= ~CEPH_DENTRY_ASYNC_UNLINK;
13624868e537SXiubo Li 			spin_unlock(&dentry->d_lock);
13634868e537SXiubo Li 
13644868e537SXiubo Li 			if (err == -EJUKEBOX) {
13652ccb4546SJeff Layton 				try_async = false;
13662ccb4546SJeff Layton 				ceph_mdsc_put_request(req);
13672ccb4546SJeff Layton 				goto retry;
13682ccb4546SJeff Layton 			}
13694868e537SXiubo Li 		}
13702ccb4546SJeff Layton 	} else {
13712ccb4546SJeff Layton 		set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
13722817b000SSage Weil 		err = ceph_mdsc_do_request(mdsc, dir, req);
13732817b000SSage Weil 		if (!err && !req->r_reply_info.head->is_dentry)
13742817b000SSage Weil 			d_delete(dentry);
13752ccb4546SJeff Layton 	}
13762ccb4546SJeff Layton 
13772817b000SSage Weil 	ceph_mdsc_put_request(req);
13782817b000SSage Weil out:
13792817b000SSage Weil 	return err;
13802817b000SSage Weil }
13812817b000SSage Weil 
ceph_rename(struct mnt_idmap * idmap,struct inode * old_dir,struct dentry * old_dentry,struct inode * new_dir,struct dentry * new_dentry,unsigned int flags)1382e18275aeSChristian Brauner static int ceph_rename(struct mnt_idmap *idmap, struct inode *old_dir,
1383549c7297SChristian Brauner 		       struct dentry *old_dentry, struct inode *new_dir,
1384549c7297SChristian Brauner 		       struct dentry *new_dentry, unsigned int flags)
13852817b000SSage Weil {
13862678da88SXiubo Li 	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(old_dir->i_sb);
13872817b000SSage Weil 	struct ceph_mds_request *req;
13880ea611a3SYan, Zheng 	int op = CEPH_MDS_OP_RENAME;
13892817b000SSage Weil 	int err;
13902817b000SSage Weil 
13911cd66c93SMiklos Szeredi 	if (flags)
13921cd66c93SMiklos Szeredi 		return -EINVAL;
13931cd66c93SMiklos Szeredi 
13942817b000SSage Weil 	if (ceph_snap(old_dir) != ceph_snap(new_dir))
13952817b000SSage Weil 		return -EXDEV;
13960ea611a3SYan, Zheng 	if (ceph_snap(old_dir) != CEPH_NOSNAP) {
13970ea611a3SYan, Zheng 		if (old_dir == new_dir && ceph_snap(old_dir) == CEPH_SNAPDIR)
13980ea611a3SYan, Zheng 			op = CEPH_MDS_OP_RENAMESNAP;
13990ea611a3SYan, Zheng 		else
14002817b000SSage Weil 			return -EROFS;
14010ea611a3SYan, Zheng 	}
14026646ea1cSLuis Henriques 	/* don't allow cross-quota renames */
14036646ea1cSLuis Henriques 	if ((old_dir != new_dir) &&
14046646ea1cSLuis Henriques 	    (!ceph_quota_is_same_realm(old_dir, new_dir)))
14056646ea1cSLuis Henriques 		return -EXDEV;
1406cafe21a4SLuis Henriques 
14074868e537SXiubo Li 	err = ceph_wait_on_conflict_unlink(new_dentry);
14084868e537SXiubo Li 	if (err)
14094868e537SXiubo Li 		return err;
14104868e537SXiubo Li 
141194af0470SJeff Layton 	err = fscrypt_prepare_rename(old_dir, old_dentry, new_dir, new_dentry,
141294af0470SJeff Layton 				     flags);
141394af0470SJeff Layton 	if (err)
141494af0470SJeff Layton 		return err;
141594af0470SJeff Layton 
14162817b000SSage Weil 	dout("rename dir %p dentry %p to dir %p dentry %p\n",
14172817b000SSage Weil 	     old_dir, old_dentry, new_dir, new_dentry);
14180ea611a3SYan, Zheng 	req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
14192817b000SSage Weil 	if (IS_ERR(req))
14202817b000SSage Weil 		return PTR_ERR(req);
1421180061a5SSage Weil 	ihold(old_dir);
14222817b000SSage Weil 	req->r_dentry = dget(new_dentry);
14232817b000SSage Weil 	req->r_num_caps = 2;
14242817b000SSage Weil 	req->r_old_dentry = dget(old_dentry);
1425180061a5SSage Weil 	req->r_old_dentry_dir = old_dir;
14263dd69aabSJeff Layton 	req->r_parent = new_dir;
14274c183472SJeff Layton 	ihold(new_dir);
14283dd69aabSJeff Layton 	set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
1429d9d00f71SXiubo Li 	req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_XATTR_EXCL;
14302817b000SSage Weil 	req->r_old_dentry_unless = CEPH_CAP_FILE_EXCL;
1431d9d00f71SXiubo Li 	req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_XATTR_EXCL;
14322817b000SSage Weil 	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
14332817b000SSage Weil 	/* release LINK_RDCACHE on source inode (mds will lock it) */
1434d19a0b54SYan, Zheng 	req->r_old_inode_drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL;
14356ef0bc6dSZhi Zhang 	if (d_really_is_positive(new_dentry)) {
14366ef0bc6dSZhi Zhang 		req->r_inode_drop =
14376ef0bc6dSZhi Zhang 			ceph_drop_caps_for_unlink(d_inode(new_dentry));
14386ef0bc6dSZhi Zhang 	}
14392817b000SSage Weil 	err = ceph_mdsc_do_request(mdsc, old_dir, req);
14402817b000SSage Weil 	if (!err && !req->r_reply_info.head->is_dentry) {
14412817b000SSage Weil 		/*
14422817b000SSage Weil 		 * Normally d_move() is done by fill_trace (called by
14432817b000SSage Weil 		 * do_request, above).  If there is no trace, we need
14442817b000SSage Weil 		 * to do it here.
14452817b000SSage Weil 		 */
14462817b000SSage Weil 		d_move(old_dentry, new_dentry);
14472817b000SSage Weil 	}
14482817b000SSage Weil 	ceph_mdsc_put_request(req);
14492817b000SSage Weil 	return err;
14502817b000SSage Weil }
14512817b000SSage Weil 
145281a6cf2dSSage Weil /*
145337c4efc1SYan, Zheng  * Move dentry to tail of mdsc->dentry_leases list when lease is updated.
145437c4efc1SYan, Zheng  * Leases at front of the list will expire first. (Assume all leases have
145537c4efc1SYan, Zheng  * similar duration)
145637c4efc1SYan, Zheng  *
145737c4efc1SYan, Zheng  * Called under dentry->d_lock.
145837c4efc1SYan, Zheng  */
__ceph_dentry_lease_touch(struct ceph_dentry_info * di)145937c4efc1SYan, Zheng void __ceph_dentry_lease_touch(struct ceph_dentry_info *di)
146037c4efc1SYan, Zheng {
146137c4efc1SYan, Zheng 	struct dentry *dn = di->dentry;
146237c4efc1SYan, Zheng 	struct ceph_mds_client *mdsc;
146337c4efc1SYan, Zheng 
146437c4efc1SYan, Zheng 	dout("dentry_lease_touch %p %p '%pd'\n", di, dn, dn);
146537c4efc1SYan, Zheng 
146637c4efc1SYan, Zheng 	di->flags |= CEPH_DENTRY_LEASE_LIST;
146737c4efc1SYan, Zheng 	if (di->flags & CEPH_DENTRY_SHRINK_LIST) {
146837c4efc1SYan, Zheng 		di->flags |= CEPH_DENTRY_REFERENCED;
146937c4efc1SYan, Zheng 		return;
147037c4efc1SYan, Zheng 	}
147137c4efc1SYan, Zheng 
1472*985b9ee8SXiubo Li 	mdsc = ceph_sb_to_fs_client(dn->d_sb)->mdsc;
147337c4efc1SYan, Zheng 	spin_lock(&mdsc->dentry_list_lock);
147437c4efc1SYan, Zheng 	list_move_tail(&di->lease_list, &mdsc->dentry_leases);
147537c4efc1SYan, Zheng 	spin_unlock(&mdsc->dentry_list_lock);
147637c4efc1SYan, Zheng }
147737c4efc1SYan, Zheng 
__dentry_dir_lease_touch(struct ceph_mds_client * mdsc,struct ceph_dentry_info * di)147837c4efc1SYan, Zheng static void __dentry_dir_lease_touch(struct ceph_mds_client* mdsc,
147937c4efc1SYan, Zheng 				     struct ceph_dentry_info *di)
148037c4efc1SYan, Zheng {
148137c4efc1SYan, Zheng 	di->flags &= ~(CEPH_DENTRY_LEASE_LIST | CEPH_DENTRY_REFERENCED);
148237c4efc1SYan, Zheng 	di->lease_gen = 0;
148337c4efc1SYan, Zheng 	di->time = jiffies;
148437c4efc1SYan, Zheng 	list_move_tail(&di->lease_list, &mdsc->dentry_dir_leases);
148537c4efc1SYan, Zheng }
148637c4efc1SYan, Zheng 
148737c4efc1SYan, Zheng /*
148837c4efc1SYan, Zheng  * When dir lease is used, add dentry to tail of mdsc->dentry_dir_leases
148937c4efc1SYan, Zheng  * list if it's not in the list, otherwise set 'referenced' flag.
149037c4efc1SYan, Zheng  *
149137c4efc1SYan, Zheng  * Called under dentry->d_lock.
149237c4efc1SYan, Zheng  */
__ceph_dentry_dir_lease_touch(struct ceph_dentry_info * di)149337c4efc1SYan, Zheng void __ceph_dentry_dir_lease_touch(struct ceph_dentry_info *di)
149437c4efc1SYan, Zheng {
149537c4efc1SYan, Zheng 	struct dentry *dn = di->dentry;
149637c4efc1SYan, Zheng 	struct ceph_mds_client *mdsc;
149737c4efc1SYan, Zheng 
14980eb30853SXiubo Li 	dout("dentry_dir_lease_touch %p %p '%pd' (offset 0x%llx)\n",
149937c4efc1SYan, Zheng 	     di, dn, dn, di->offset);
150037c4efc1SYan, Zheng 
150137c4efc1SYan, Zheng 	if (!list_empty(&di->lease_list)) {
150237c4efc1SYan, Zheng 		if (di->flags & CEPH_DENTRY_LEASE_LIST) {
150337c4efc1SYan, Zheng 			/* don't remove dentry from dentry lease list
150437c4efc1SYan, Zheng 			 * if its lease is valid */
150537c4efc1SYan, Zheng 			if (__dentry_lease_is_valid(di))
150637c4efc1SYan, Zheng 				return;
150737c4efc1SYan, Zheng 		} else {
150837c4efc1SYan, Zheng 			di->flags |= CEPH_DENTRY_REFERENCED;
150937c4efc1SYan, Zheng 			return;
151037c4efc1SYan, Zheng 		}
151137c4efc1SYan, Zheng 	}
151237c4efc1SYan, Zheng 
151337c4efc1SYan, Zheng 	if (di->flags & CEPH_DENTRY_SHRINK_LIST) {
151437c4efc1SYan, Zheng 		di->flags |= CEPH_DENTRY_REFERENCED;
151537c4efc1SYan, Zheng 		di->flags &= ~CEPH_DENTRY_LEASE_LIST;
151637c4efc1SYan, Zheng 		return;
151737c4efc1SYan, Zheng 	}
151837c4efc1SYan, Zheng 
1519*985b9ee8SXiubo Li 	mdsc = ceph_sb_to_fs_client(dn->d_sb)->mdsc;
152037c4efc1SYan, Zheng 	spin_lock(&mdsc->dentry_list_lock);
152137c4efc1SYan, Zheng 	__dentry_dir_lease_touch(mdsc, di),
152237c4efc1SYan, Zheng 	spin_unlock(&mdsc->dentry_list_lock);
152337c4efc1SYan, Zheng }
152437c4efc1SYan, Zheng 
__dentry_lease_unlist(struct ceph_dentry_info * di)152537c4efc1SYan, Zheng static void __dentry_lease_unlist(struct ceph_dentry_info *di)
152637c4efc1SYan, Zheng {
152737c4efc1SYan, Zheng 	struct ceph_mds_client *mdsc;
152837c4efc1SYan, Zheng 	if (di->flags & CEPH_DENTRY_SHRINK_LIST)
152937c4efc1SYan, Zheng 		return;
153037c4efc1SYan, Zheng 	if (list_empty(&di->lease_list))
153137c4efc1SYan, Zheng 		return;
153237c4efc1SYan, Zheng 
1533*985b9ee8SXiubo Li 	mdsc = ceph_sb_to_fs_client(di->dentry->d_sb)->mdsc;
153437c4efc1SYan, Zheng 	spin_lock(&mdsc->dentry_list_lock);
153537c4efc1SYan, Zheng 	list_del_init(&di->lease_list);
153637c4efc1SYan, Zheng 	spin_unlock(&mdsc->dentry_list_lock);
153737c4efc1SYan, Zheng }
153837c4efc1SYan, Zheng 
153937c4efc1SYan, Zheng enum {
154037c4efc1SYan, Zheng 	KEEP	= 0,
154137c4efc1SYan, Zheng 	DELETE	= 1,
154237c4efc1SYan, Zheng 	TOUCH	= 2,
154337c4efc1SYan, Zheng 	STOP	= 4,
154437c4efc1SYan, Zheng };
154537c4efc1SYan, Zheng 
154637c4efc1SYan, Zheng struct ceph_lease_walk_control {
154737c4efc1SYan, Zheng 	bool dir_lease;
1548fe33032dSYan, Zheng 	bool expire_dir_lease;
154937c4efc1SYan, Zheng 	unsigned long nr_to_scan;
155037c4efc1SYan, Zheng 	unsigned long dir_lease_ttl;
155137c4efc1SYan, Zheng };
155237c4efc1SYan, Zheng 
155337c4efc1SYan, Zheng static unsigned long
__dentry_leases_walk(struct ceph_mds_client * mdsc,struct ceph_lease_walk_control * lwc,int (* check)(struct dentry *,void *))155437c4efc1SYan, Zheng __dentry_leases_walk(struct ceph_mds_client *mdsc,
155537c4efc1SYan, Zheng 		     struct ceph_lease_walk_control *lwc,
155637c4efc1SYan, Zheng 		     int (*check)(struct dentry*, void*))
155737c4efc1SYan, Zheng {
155837c4efc1SYan, Zheng 	struct ceph_dentry_info *di, *tmp;
155937c4efc1SYan, Zheng 	struct dentry *dentry, *last = NULL;
156037c4efc1SYan, Zheng 	struct list_head* list;
156137c4efc1SYan, Zheng         LIST_HEAD(dispose);
156237c4efc1SYan, Zheng 	unsigned long freed = 0;
156337c4efc1SYan, Zheng 	int ret = 0;
156437c4efc1SYan, Zheng 
156537c4efc1SYan, Zheng 	list = lwc->dir_lease ? &mdsc->dentry_dir_leases : &mdsc->dentry_leases;
156637c4efc1SYan, Zheng 	spin_lock(&mdsc->dentry_list_lock);
156737c4efc1SYan, Zheng 	list_for_each_entry_safe(di, tmp, list, lease_list) {
156837c4efc1SYan, Zheng 		if (!lwc->nr_to_scan)
156937c4efc1SYan, Zheng 			break;
157037c4efc1SYan, Zheng 		--lwc->nr_to_scan;
157137c4efc1SYan, Zheng 
157237c4efc1SYan, Zheng 		dentry = di->dentry;
157337c4efc1SYan, Zheng 		if (last == dentry)
157437c4efc1SYan, Zheng 			break;
157537c4efc1SYan, Zheng 
157637c4efc1SYan, Zheng 		if (!spin_trylock(&dentry->d_lock))
157737c4efc1SYan, Zheng 			continue;
157837c4efc1SYan, Zheng 
1579516162b9SAl Viro 		if (__lockref_is_dead(&dentry->d_lockref)) {
158037c4efc1SYan, Zheng 			list_del_init(&di->lease_list);
158137c4efc1SYan, Zheng 			goto next;
158237c4efc1SYan, Zheng 		}
158337c4efc1SYan, Zheng 
158437c4efc1SYan, Zheng 		ret = check(dentry, lwc);
158537c4efc1SYan, Zheng 		if (ret & TOUCH) {
158637c4efc1SYan, Zheng 			/* move it into tail of dir lease list */
158737c4efc1SYan, Zheng 			__dentry_dir_lease_touch(mdsc, di);
158837c4efc1SYan, Zheng 			if (!last)
158937c4efc1SYan, Zheng 				last = dentry;
159037c4efc1SYan, Zheng 		}
159137c4efc1SYan, Zheng 		if (ret & DELETE) {
159237c4efc1SYan, Zheng 			/* stale lease */
159337c4efc1SYan, Zheng 			di->flags &= ~CEPH_DENTRY_REFERENCED;
159437c4efc1SYan, Zheng 			if (dentry->d_lockref.count > 0) {
159537c4efc1SYan, Zheng 				/* update_dentry_lease() will re-add
159637c4efc1SYan, Zheng 				 * it to lease list, or
159737c4efc1SYan, Zheng 				 * ceph_d_delete() will return 1 when
159837c4efc1SYan, Zheng 				 * last reference is dropped */
159937c4efc1SYan, Zheng 				list_del_init(&di->lease_list);
160037c4efc1SYan, Zheng 			} else {
160137c4efc1SYan, Zheng 				di->flags |= CEPH_DENTRY_SHRINK_LIST;
160237c4efc1SYan, Zheng 				list_move_tail(&di->lease_list, &dispose);
160337c4efc1SYan, Zheng 				dget_dlock(dentry);
160437c4efc1SYan, Zheng 			}
160537c4efc1SYan, Zheng 		}
160637c4efc1SYan, Zheng next:
160737c4efc1SYan, Zheng 		spin_unlock(&dentry->d_lock);
160837c4efc1SYan, Zheng 		if (ret & STOP)
160937c4efc1SYan, Zheng 			break;
161037c4efc1SYan, Zheng 	}
161137c4efc1SYan, Zheng 	spin_unlock(&mdsc->dentry_list_lock);
161237c4efc1SYan, Zheng 
161337c4efc1SYan, Zheng 	while (!list_empty(&dispose)) {
161437c4efc1SYan, Zheng 		di = list_first_entry(&dispose, struct ceph_dentry_info,
161537c4efc1SYan, Zheng 				      lease_list);
161637c4efc1SYan, Zheng 		dentry = di->dentry;
161737c4efc1SYan, Zheng 		spin_lock(&dentry->d_lock);
161837c4efc1SYan, Zheng 
161937c4efc1SYan, Zheng 		list_del_init(&di->lease_list);
162037c4efc1SYan, Zheng 		di->flags &= ~CEPH_DENTRY_SHRINK_LIST;
162137c4efc1SYan, Zheng 		if (di->flags & CEPH_DENTRY_REFERENCED) {
162237c4efc1SYan, Zheng 			spin_lock(&mdsc->dentry_list_lock);
162337c4efc1SYan, Zheng 			if (di->flags & CEPH_DENTRY_LEASE_LIST) {
162437c4efc1SYan, Zheng 				list_add_tail(&di->lease_list,
162537c4efc1SYan, Zheng 					      &mdsc->dentry_leases);
162637c4efc1SYan, Zheng 			} else {
162737c4efc1SYan, Zheng 				__dentry_dir_lease_touch(mdsc, di);
162837c4efc1SYan, Zheng 			}
162937c4efc1SYan, Zheng 			spin_unlock(&mdsc->dentry_list_lock);
163037c4efc1SYan, Zheng 		} else {
163137c4efc1SYan, Zheng 			freed++;
163237c4efc1SYan, Zheng 		}
163337c4efc1SYan, Zheng 
163437c4efc1SYan, Zheng 		spin_unlock(&dentry->d_lock);
163537c4efc1SYan, Zheng 		/* ceph_d_delete() does the trick */
163637c4efc1SYan, Zheng 		dput(dentry);
163737c4efc1SYan, Zheng 	}
163837c4efc1SYan, Zheng 	return freed;
163937c4efc1SYan, Zheng }
164037c4efc1SYan, Zheng 
__dentry_lease_check(struct dentry * dentry,void * arg)164137c4efc1SYan, Zheng static int __dentry_lease_check(struct dentry *dentry, void *arg)
164237c4efc1SYan, Zheng {
164337c4efc1SYan, Zheng 	struct ceph_dentry_info *di = ceph_dentry(dentry);
164437c4efc1SYan, Zheng 	int ret;
164537c4efc1SYan, Zheng 
164637c4efc1SYan, Zheng 	if (__dentry_lease_is_valid(di))
164737c4efc1SYan, Zheng 		return STOP;
164837c4efc1SYan, Zheng 	ret = __dir_lease_try_check(dentry);
164937c4efc1SYan, Zheng 	if (ret == -EBUSY)
165037c4efc1SYan, Zheng 		return KEEP;
165137c4efc1SYan, Zheng 	if (ret > 0)
165237c4efc1SYan, Zheng 		return TOUCH;
165337c4efc1SYan, Zheng 	return DELETE;
165437c4efc1SYan, Zheng }
165537c4efc1SYan, Zheng 
__dir_lease_check(struct dentry * dentry,void * arg)165637c4efc1SYan, Zheng static int __dir_lease_check(struct dentry *dentry, void *arg)
165737c4efc1SYan, Zheng {
165837c4efc1SYan, Zheng 	struct ceph_lease_walk_control *lwc = arg;
165937c4efc1SYan, Zheng 	struct ceph_dentry_info *di = ceph_dentry(dentry);
166037c4efc1SYan, Zheng 
166137c4efc1SYan, Zheng 	int ret = __dir_lease_try_check(dentry);
166237c4efc1SYan, Zheng 	if (ret == -EBUSY)
166337c4efc1SYan, Zheng 		return KEEP;
166437c4efc1SYan, Zheng 	if (ret > 0) {
166537c4efc1SYan, Zheng 		if (time_before(jiffies, di->time + lwc->dir_lease_ttl))
166637c4efc1SYan, Zheng 			return STOP;
166737c4efc1SYan, Zheng 		/* Move dentry to tail of dir lease list if we don't want
166837c4efc1SYan, Zheng 		 * to delete it. So dentries in the list are checked in a
166937c4efc1SYan, Zheng 		 * round robin manner */
1670fe33032dSYan, Zheng 		if (!lwc->expire_dir_lease)
167137c4efc1SYan, Zheng 			return TOUCH;
1672fe33032dSYan, Zheng 		if (dentry->d_lockref.count > 0 ||
1673fe33032dSYan, Zheng 		    (di->flags & CEPH_DENTRY_REFERENCED))
1674fe33032dSYan, Zheng 			return TOUCH;
1675fe33032dSYan, Zheng 		/* invalidate dir lease */
1676fe33032dSYan, Zheng 		di->lease_shared_gen = 0;
167737c4efc1SYan, Zheng 	}
167837c4efc1SYan, Zheng 	return DELETE;
167937c4efc1SYan, Zheng }
168037c4efc1SYan, Zheng 
ceph_trim_dentries(struct ceph_mds_client * mdsc)168137c4efc1SYan, Zheng int ceph_trim_dentries(struct ceph_mds_client *mdsc)
168237c4efc1SYan, Zheng {
168337c4efc1SYan, Zheng 	struct ceph_lease_walk_control lwc;
1684fe33032dSYan, Zheng 	unsigned long count;
168537c4efc1SYan, Zheng 	unsigned long freed;
168637c4efc1SYan, Zheng 
1687fe33032dSYan, Zheng 	spin_lock(&mdsc->caps_list_lock);
1688fe33032dSYan, Zheng         if (mdsc->caps_use_max > 0 &&
1689fe33032dSYan, Zheng             mdsc->caps_use_count > mdsc->caps_use_max)
1690fe33032dSYan, Zheng 		count = mdsc->caps_use_count - mdsc->caps_use_max;
1691fe33032dSYan, Zheng 	else
1692fe33032dSYan, Zheng 		count = 0;
1693fe33032dSYan, Zheng         spin_unlock(&mdsc->caps_list_lock);
1694fe33032dSYan, Zheng 
169537c4efc1SYan, Zheng 	lwc.dir_lease = false;
169637c4efc1SYan, Zheng 	lwc.nr_to_scan  = CEPH_CAPS_PER_RELEASE * 2;
169737c4efc1SYan, Zheng 	freed = __dentry_leases_walk(mdsc, &lwc, __dentry_lease_check);
169837c4efc1SYan, Zheng 	if (!lwc.nr_to_scan) /* more invalid leases */
169937c4efc1SYan, Zheng 		return -EAGAIN;
170037c4efc1SYan, Zheng 
170137c4efc1SYan, Zheng 	if (lwc.nr_to_scan < CEPH_CAPS_PER_RELEASE)
170237c4efc1SYan, Zheng 		lwc.nr_to_scan = CEPH_CAPS_PER_RELEASE;
170337c4efc1SYan, Zheng 
170437c4efc1SYan, Zheng 	lwc.dir_lease = true;
1705fe33032dSYan, Zheng 	lwc.expire_dir_lease = freed < count;
1706fe33032dSYan, Zheng 	lwc.dir_lease_ttl = mdsc->fsc->mount_options->caps_wanted_delay_max * HZ;
170737c4efc1SYan, Zheng 	freed +=__dentry_leases_walk(mdsc, &lwc, __dir_lease_check);
170837c4efc1SYan, Zheng 	if (!lwc.nr_to_scan) /* more to check */
170937c4efc1SYan, Zheng 		return -EAGAIN;
171037c4efc1SYan, Zheng 
171137c4efc1SYan, Zheng 	return freed > 0 ? 1 : 0;
171237c4efc1SYan, Zheng }
171337c4efc1SYan, Zheng 
171437c4efc1SYan, Zheng /*
171581a6cf2dSSage Weil  * Ensure a dentry lease will no longer revalidate.
171681a6cf2dSSage Weil  */
ceph_invalidate_dentry_lease(struct dentry * dentry)171781a6cf2dSSage Weil void ceph_invalidate_dentry_lease(struct dentry *dentry)
171881a6cf2dSSage Weil {
171937c4efc1SYan, Zheng 	struct ceph_dentry_info *di = ceph_dentry(dentry);
172081a6cf2dSSage Weil 	spin_lock(&dentry->d_lock);
172137c4efc1SYan, Zheng 	di->time = jiffies;
172237c4efc1SYan, Zheng 	di->lease_shared_gen = 0;
1723f5e17aedSJeff Layton 	di->flags &= ~CEPH_DENTRY_PRIMARY_LINK;
172437c4efc1SYan, Zheng 	__dentry_lease_unlist(di);
172581a6cf2dSSage Weil 	spin_unlock(&dentry->d_lock);
172681a6cf2dSSage Weil }
17272817b000SSage Weil 
17282817b000SSage Weil /*
17292817b000SSage Weil  * Check if dentry lease is valid.  If not, delete the lease.  Try to
17302817b000SSage Weil  * renew if the least is more than half up.
17312817b000SSage Weil  */
__dentry_lease_is_valid(struct ceph_dentry_info * di)17321e9c2eb6SYan, Zheng static bool __dentry_lease_is_valid(struct ceph_dentry_info *di)
17331e9c2eb6SYan, Zheng {
17341e9c2eb6SYan, Zheng 	struct ceph_mds_session *session;
17351e9c2eb6SYan, Zheng 
17361e9c2eb6SYan, Zheng 	if (!di->lease_gen)
17371e9c2eb6SYan, Zheng 		return false;
17381e9c2eb6SYan, Zheng 
17391e9c2eb6SYan, Zheng 	session = di->lease_session;
17401e9c2eb6SYan, Zheng 	if (session) {
17411e9c2eb6SYan, Zheng 		u32 gen;
17421e9c2eb6SYan, Zheng 		unsigned long ttl;
17431e9c2eb6SYan, Zheng 
174452d60f8eSJeff Layton 		gen = atomic_read(&session->s_cap_gen);
17451e9c2eb6SYan, Zheng 		ttl = session->s_cap_ttl;
17461e9c2eb6SYan, Zheng 
17471e9c2eb6SYan, Zheng 		if (di->lease_gen == gen &&
17481e9c2eb6SYan, Zheng 		    time_before(jiffies, ttl) &&
17491e9c2eb6SYan, Zheng 		    time_before(jiffies, di->time))
17501e9c2eb6SYan, Zheng 			return true;
17511e9c2eb6SYan, Zheng 	}
17521e9c2eb6SYan, Zheng 	di->lease_gen = 0;
17531e9c2eb6SYan, Zheng 	return false;
17541e9c2eb6SYan, Zheng }
17551e9c2eb6SYan, Zheng 
dentry_lease_is_valid(struct dentry * dentry,unsigned int flags)17568f2a98efSYan, Zheng static int dentry_lease_is_valid(struct dentry *dentry, unsigned int flags)
17572817b000SSage Weil {
17582817b000SSage Weil 	struct ceph_dentry_info *di;
17592817b000SSage Weil 	struct ceph_mds_session *session = NULL;
17602817b000SSage Weil 	u32 seq = 0;
17611e9c2eb6SYan, Zheng 	int valid = 0;
17622817b000SSage Weil 
17632817b000SSage Weil 	spin_lock(&dentry->d_lock);
17642817b000SSage Weil 	di = ceph_dentry(dentry);
17651e9c2eb6SYan, Zheng 	if (di && __dentry_lease_is_valid(di)) {
17662817b000SSage Weil 		valid = 1;
17671e9c2eb6SYan, Zheng 
17682817b000SSage Weil 		if (di->lease_renew_after &&
17692817b000SSage Weil 		    time_after(jiffies, di->lease_renew_after)) {
177014fb9c9eSJeff Layton 			/*
177114fb9c9eSJeff Layton 			 * We should renew. If we're in RCU walk mode
177214fb9c9eSJeff Layton 			 * though, we can't do that so just return
177314fb9c9eSJeff Layton 			 * -ECHILD.
177414fb9c9eSJeff Layton 			 */
177514fb9c9eSJeff Layton 			if (flags & LOOKUP_RCU) {
177614fb9c9eSJeff Layton 				valid = -ECHILD;
177714fb9c9eSJeff Layton 			} else {
17781e9c2eb6SYan, Zheng 				session = ceph_get_mds_session(di->lease_session);
17792817b000SSage Weil 				seq = di->lease_seq;
17802817b000SSage Weil 				di->lease_renew_after = 0;
17812817b000SSage Weil 				di->lease_renew_from = jiffies;
17822817b000SSage Weil 			}
17832817b000SSage Weil 		}
17842817b000SSage Weil 	}
17852817b000SSage Weil 	spin_unlock(&dentry->d_lock);
17862817b000SSage Weil 
17872817b000SSage Weil 	if (session) {
17888f2a98efSYan, Zheng 		ceph_mdsc_lease_send_msg(session, dentry,
17892817b000SSage Weil 					 CEPH_MDS_LEASE_RENEW, seq);
17902817b000SSage Weil 		ceph_put_mds_session(session);
17912817b000SSage Weil 	}
17922817b000SSage Weil 	dout("dentry_lease_is_valid - dentry %p = %d\n", dentry, valid);
17932817b000SSage Weil 	return valid;
17942817b000SSage Weil }
17952817b000SSage Weil 
17962817b000SSage Weil /*
17971e9c2eb6SYan, Zheng  * Called under dentry->d_lock.
17981e9c2eb6SYan, Zheng  */
__dir_lease_try_check(const struct dentry * dentry)17991e9c2eb6SYan, Zheng static int __dir_lease_try_check(const struct dentry *dentry)
18001e9c2eb6SYan, Zheng {
18011e9c2eb6SYan, Zheng 	struct ceph_dentry_info *di = ceph_dentry(dentry);
18021e9c2eb6SYan, Zheng 	struct inode *dir;
18031e9c2eb6SYan, Zheng 	struct ceph_inode_info *ci;
18041e9c2eb6SYan, Zheng 	int valid = 0;
18051e9c2eb6SYan, Zheng 
18061e9c2eb6SYan, Zheng 	if (!di->lease_shared_gen)
18071e9c2eb6SYan, Zheng 		return 0;
18081e9c2eb6SYan, Zheng 	if (IS_ROOT(dentry))
18091e9c2eb6SYan, Zheng 		return 0;
18101e9c2eb6SYan, Zheng 
18111e9c2eb6SYan, Zheng 	dir = d_inode(dentry->d_parent);
18121e9c2eb6SYan, Zheng 	ci = ceph_inode(dir);
18131e9c2eb6SYan, Zheng 
18141e9c2eb6SYan, Zheng 	if (spin_trylock(&ci->i_ceph_lock)) {
18151e9c2eb6SYan, Zheng 		if (atomic_read(&ci->i_shared_gen) == di->lease_shared_gen &&
18161e9c2eb6SYan, Zheng 		    __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 0))
18171e9c2eb6SYan, Zheng 			valid = 1;
18181e9c2eb6SYan, Zheng 		spin_unlock(&ci->i_ceph_lock);
18191e9c2eb6SYan, Zheng 	} else {
18201e9c2eb6SYan, Zheng 		valid = -EBUSY;
18211e9c2eb6SYan, Zheng 	}
18221e9c2eb6SYan, Zheng 
18231e9c2eb6SYan, Zheng 	if (!valid)
18241e9c2eb6SYan, Zheng 		di->lease_shared_gen = 0;
18251e9c2eb6SYan, Zheng 	return valid;
18261e9c2eb6SYan, Zheng }
18271e9c2eb6SYan, Zheng 
18281e9c2eb6SYan, Zheng /*
18292817b000SSage Weil  * Check if directory-wide content lease/cap is valid.
18302817b000SSage Weil  */
dir_lease_is_valid(struct inode * dir,struct dentry * dentry,struct ceph_mds_client * mdsc)1831719a2514SYan, Zheng static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry,
1832719a2514SYan, Zheng 			      struct ceph_mds_client *mdsc)
18332817b000SSage Weil {
18342817b000SSage Weil 	struct ceph_inode_info *ci = ceph_inode(dir);
1835feab6ac2SYan, Zheng 	int valid;
1836feab6ac2SYan, Zheng 	int shared_gen;
18372817b000SSage Weil 
1838be655596SSage Weil 	spin_lock(&ci->i_ceph_lock);
18392817b000SSage Weil 	valid = __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1);
1840719a2514SYan, Zheng 	if (valid) {
1841719a2514SYan, Zheng 		__ceph_touch_fmode(ci, mdsc, CEPH_FILE_MODE_RD);
1842feab6ac2SYan, Zheng 		shared_gen = atomic_read(&ci->i_shared_gen);
1843719a2514SYan, Zheng 	}
1844be655596SSage Weil 	spin_unlock(&ci->i_ceph_lock);
1845feab6ac2SYan, Zheng 	if (valid) {
1846feab6ac2SYan, Zheng 		struct ceph_dentry_info *di;
1847feab6ac2SYan, Zheng 		spin_lock(&dentry->d_lock);
1848feab6ac2SYan, Zheng 		di = ceph_dentry(dentry);
1849feab6ac2SYan, Zheng 		if (dir == d_inode(dentry->d_parent) &&
1850feab6ac2SYan, Zheng 		    di && di->lease_shared_gen == shared_gen)
185137c4efc1SYan, Zheng 			__ceph_dentry_dir_lease_touch(di);
1852feab6ac2SYan, Zheng 		else
1853feab6ac2SYan, Zheng 			valid = 0;
1854feab6ac2SYan, Zheng 		spin_unlock(&dentry->d_lock);
1855feab6ac2SYan, Zheng 	}
1856feab6ac2SYan, Zheng 	dout("dir_lease_is_valid dir %p v%u dentry %p = %d\n",
1857feab6ac2SYan, Zheng 	     dir, (unsigned)atomic_read(&ci->i_shared_gen), dentry, valid);
18582817b000SSage Weil 	return valid;
18592817b000SSage Weil }
18602817b000SSage Weil 
18612817b000SSage Weil /*
18622817b000SSage Weil  * Check if cached dentry can be trusted.
18632817b000SSage Weil  */
ceph_d_revalidate(struct dentry * dentry,unsigned int flags)18640b728e19SAl Viro static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
18652817b000SSage Weil {
1866bf1c6acaSSage Weil 	int valid = 0;
1867641235d8SYan, Zheng 	struct dentry *parent;
1868aa8dd816SAl Viro 	struct inode *dir, *inode;
1869719a2514SYan, Zheng 	struct ceph_mds_client *mdsc;
187034286d66SNick Piggin 
1871c5267601SJeff Layton 	valid = fscrypt_d_revalidate(dentry, flags);
1872c5267601SJeff Layton 	if (valid <= 0)
1873c5267601SJeff Layton 		return valid;
1874c5267601SJeff Layton 
1875f49d1e05SJeff Layton 	if (flags & LOOKUP_RCU) {
187652953d55SSeraphime Kirkovski 		parent = READ_ONCE(dentry->d_parent);
1877f49d1e05SJeff Layton 		dir = d_inode_rcu(parent);
1878f49d1e05SJeff Layton 		if (!dir)
187934286d66SNick Piggin 			return -ECHILD;
1880aa8dd816SAl Viro 		inode = d_inode_rcu(dentry);
1881f49d1e05SJeff Layton 	} else {
1882f49d1e05SJeff Layton 		parent = dget_parent(dentry);
1883f49d1e05SJeff Layton 		dir = d_inode(parent);
1884aa8dd816SAl Viro 		inode = d_inode(dentry);
1885f49d1e05SJeff Layton 	}
188634286d66SNick Piggin 
1887c5267601SJeff Layton 	dout("d_revalidate %p '%pd' inode %p offset 0x%llx nokey %d\n", dentry,
1888c5267601SJeff Layton 	     dentry, inode, ceph_dentry(dentry)->offset,
1889c5267601SJeff Layton 	     !!(dentry->d_flags & DCACHE_NOKEY_NAME));
18902817b000SSage Weil 
1891*985b9ee8SXiubo Li 	mdsc = ceph_sb_to_fs_client(dir->i_sb)->mdsc;
1892719a2514SYan, Zheng 
18932817b000SSage Weil 	/* always trust cached snapped dentries, snapdir dentry */
18942817b000SSage Weil 	if (ceph_snap(dir) != CEPH_NOSNAP) {
1895a455589fSAl Viro 		dout("d_revalidate %p '%pd' inode %p is SNAPPED\n", dentry,
1896aa8dd816SAl Viro 		     dentry, inode);
1897bf1c6acaSSage Weil 		valid = 1;
1898aa8dd816SAl Viro 	} else if (inode && ceph_snap(inode) == CEPH_SNAPDIR) {
1899bf1c6acaSSage Weil 		valid = 1;
190014fb9c9eSJeff Layton 	} else {
19018f2a98efSYan, Zheng 		valid = dentry_lease_is_valid(dentry, flags);
190214fb9c9eSJeff Layton 		if (valid == -ECHILD)
190314fb9c9eSJeff Layton 			return valid;
1904719a2514SYan, Zheng 		if (valid || dir_lease_is_valid(dir, dentry, mdsc)) {
1905aa8dd816SAl Viro 			if (inode)
1906aa8dd816SAl Viro 				valid = ceph_is_any_caps(inode);
19079215aeeaSYan, Zheng 			else
1908bf1c6acaSSage Weil 				valid = 1;
19092817b000SSage Weil 		}
191014fb9c9eSJeff Layton 	}
19112817b000SSage Weil 
1912200fd27cSYan, Zheng 	if (!valid) {
1913200fd27cSYan, Zheng 		struct ceph_mds_request *req;
19141097680dSJeff Layton 		int op, err;
19151097680dSJeff Layton 		u32 mask;
1916200fd27cSYan, Zheng 
1917f49d1e05SJeff Layton 		if (flags & LOOKUP_RCU)
1918f49d1e05SJeff Layton 			return -ECHILD;
1919f49d1e05SJeff Layton 
1920f9009efaSXiubo Li 		percpu_counter_inc(&mdsc->metric.d_lease_mis);
1921f9009efaSXiubo Li 
1922200fd27cSYan, Zheng 		op = ceph_snap(dir) == CEPH_SNAPDIR ?
19235eb9f604SJeff Layton 			CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_LOOKUP;
1924200fd27cSYan, Zheng 		req = ceph_mdsc_create_request(mdsc, op, USE_ANY_MDS);
1925200fd27cSYan, Zheng 		if (!IS_ERR(req)) {
1926200fd27cSYan, Zheng 			req->r_dentry = dget(dentry);
19275eb9f604SJeff Layton 			req->r_num_caps = 2;
19285eb9f604SJeff Layton 			req->r_parent = dir;
19294c183472SJeff Layton 			ihold(dir);
1930200fd27cSYan, Zheng 
1931200fd27cSYan, Zheng 			mask = CEPH_STAT_CAP_INODE | CEPH_CAP_AUTH_SHARED;
1932200fd27cSYan, Zheng 			if (ceph_security_xattr_wanted(dir))
1933200fd27cSYan, Zheng 				mask |= CEPH_CAP_XATTR_SHARED;
19341097680dSJeff Layton 			req->r_args.getattr.mask = cpu_to_le32(mask);
1935200fd27cSYan, Zheng 
1936200fd27cSYan, Zheng 			err = ceph_mdsc_do_request(mdsc, NULL, req);
1937c3f4688aSJeff Layton 			switch (err) {
1938c3f4688aSJeff Layton 			case 0:
1939c3f4688aSJeff Layton 				if (d_really_is_positive(dentry) &&
1940c3f4688aSJeff Layton 				    d_inode(dentry) == req->r_target_inode)
1941c3f4688aSJeff Layton 					valid = 1;
1942c3f4688aSJeff Layton 				break;
1943c3f4688aSJeff Layton 			case -ENOENT:
1944c3f4688aSJeff Layton 				if (d_really_is_negative(dentry))
1945c3f4688aSJeff Layton 					valid = 1;
1946df561f66SGustavo A. R. Silva 				fallthrough;
1947c3f4688aSJeff Layton 			default:
1948c3f4688aSJeff Layton 				break;
1949200fd27cSYan, Zheng 			}
1950200fd27cSYan, Zheng 			ceph_mdsc_put_request(req);
1951200fd27cSYan, Zheng 			dout("d_revalidate %p lookup result=%d\n",
1952200fd27cSYan, Zheng 			     dentry, err);
1953200fd27cSYan, Zheng 		}
1954f9009efaSXiubo Li 	} else {
1955f9009efaSXiubo Li 		percpu_counter_inc(&mdsc->metric.d_lease_hit);
1956200fd27cSYan, Zheng 	}
1957200fd27cSYan, Zheng 
1958bf1c6acaSSage Weil 	dout("d_revalidate %p %s\n", dentry, valid ? "valid" : "invalid");
195937c4efc1SYan, Zheng 	if (!valid)
19609215aeeaSYan, Zheng 		ceph_dir_clear_complete(dir);
1961641235d8SYan, Zheng 
1962f49d1e05SJeff Layton 	if (!(flags & LOOKUP_RCU))
1963641235d8SYan, Zheng 		dput(parent);
1964bf1c6acaSSage Weil 	return valid;
19652817b000SSage Weil }
19662817b000SSage Weil 
19672817b000SSage Weil /*
19681e9c2eb6SYan, Zheng  * Delete unused dentry that doesn't have valid lease
19691e9c2eb6SYan, Zheng  *
19701e9c2eb6SYan, Zheng  * Called under dentry->d_lock.
19711e9c2eb6SYan, Zheng  */
ceph_d_delete(const struct dentry * dentry)19721e9c2eb6SYan, Zheng static int ceph_d_delete(const struct dentry *dentry)
19731e9c2eb6SYan, Zheng {
19741e9c2eb6SYan, Zheng 	struct ceph_dentry_info *di;
19751e9c2eb6SYan, Zheng 
19761e9c2eb6SYan, Zheng 	/* won't release caps */
19771e9c2eb6SYan, Zheng 	if (d_really_is_negative(dentry))
19781e9c2eb6SYan, Zheng 		return 0;
19791e9c2eb6SYan, Zheng 	if (ceph_snap(d_inode(dentry)) != CEPH_NOSNAP)
19801e9c2eb6SYan, Zheng 		return 0;
19811e9c2eb6SYan, Zheng 	/* vaild lease? */
19821e9c2eb6SYan, Zheng 	di = ceph_dentry(dentry);
19831e9c2eb6SYan, Zheng 	if (di) {
19841e9c2eb6SYan, Zheng 		if (__dentry_lease_is_valid(di))
19851e9c2eb6SYan, Zheng 			return 0;
19861e9c2eb6SYan, Zheng 		if (__dir_lease_try_check(dentry))
19871e9c2eb6SYan, Zheng 			return 0;
19881e9c2eb6SYan, Zheng 	}
19891e9c2eb6SYan, Zheng 	return 1;
19901e9c2eb6SYan, Zheng }
19911e9c2eb6SYan, Zheng 
19921e9c2eb6SYan, Zheng /*
1993147851d2SSage Weil  * Release our ceph_dentry_info.
19942817b000SSage Weil  */
ceph_d_release(struct dentry * dentry)1995147851d2SSage Weil static void ceph_d_release(struct dentry *dentry)
19962817b000SSage Weil {
19972817b000SSage Weil 	struct ceph_dentry_info *di = ceph_dentry(dentry);
1998*985b9ee8SXiubo Li 	struct ceph_fs_client *fsc = ceph_sb_to_fs_client(dentry->d_sb);
19992817b000SSage Weil 
2000147851d2SSage Weil 	dout("d_release %p\n", dentry);
20015b484a51SJeff Layton 
2002f9009efaSXiubo Li 	atomic64_dec(&fsc->mdsc->metric.total_dentries);
2003f9009efaSXiubo Li 
20045b484a51SJeff Layton 	spin_lock(&dentry->d_lock);
200537c4efc1SYan, Zheng 	__dentry_lease_unlist(di);
20065b484a51SJeff Layton 	dentry->d_fsdata = NULL;
20075b484a51SJeff Layton 	spin_unlock(&dentry->d_lock);
20085b484a51SJeff Layton 
20092817b000SSage Weil 	ceph_put_mds_session(di->lease_session);
20102817b000SSage Weil 	kmem_cache_free(ceph_dentry_cachep, di);
20112817b000SSage Weil }
20122817b000SSage Weil 
2013b58dc410SSage Weil /*
2014b58dc410SSage Weil  * When the VFS prunes a dentry from the cache, we need to clear the
2015b58dc410SSage Weil  * complete flag on the parent directory.
2016b58dc410SSage Weil  *
2017b58dc410SSage Weil  * Called under dentry->d_lock.
2018b58dc410SSage Weil  */
ceph_d_prune(struct dentry * dentry)2019b58dc410SSage Weil static void ceph_d_prune(struct dentry *dentry)
2020b58dc410SSage Weil {
20215495c2d0SYan, Zheng 	struct ceph_inode_info *dir_ci;
20225495c2d0SYan, Zheng 	struct ceph_dentry_info *di;
20235495c2d0SYan, Zheng 
20245495c2d0SYan, Zheng 	dout("ceph_d_prune %pd %p\n", dentry, dentry);
2025b58dc410SSage Weil 
2026b58dc410SSage Weil 	/* do we have a valid parent? */
20278842b3beSSage Weil 	if (IS_ROOT(dentry))
2028b58dc410SSage Weil 		return;
2029b58dc410SSage Weil 
20305495c2d0SYan, Zheng 	/* we hold d_lock, so d_parent is stable */
20315495c2d0SYan, Zheng 	dir_ci = ceph_inode(d_inode(dentry->d_parent));
20325495c2d0SYan, Zheng 	if (dir_ci->i_vino.snap == CEPH_SNAPDIR)
2033b58dc410SSage Weil 		return;
2034b58dc410SSage Weil 
20355495c2d0SYan, Zheng 	/* who calls d_delete() should also disable dcache readdir */
20365495c2d0SYan, Zheng 	if (d_really_is_negative(dentry))
203718fc8abdSAl Viro 		return;
203818fc8abdSAl Viro 
20395495c2d0SYan, Zheng 	/* d_fsdata does not get cleared until d_release */
20405495c2d0SYan, Zheng 	if (!d_unhashed(dentry)) {
20415495c2d0SYan, Zheng 		__ceph_dir_clear_complete(dir_ci);
20425495c2d0SYan, Zheng 		return;
20435495c2d0SYan, Zheng 	}
20445495c2d0SYan, Zheng 
20455495c2d0SYan, Zheng 	/* Disable dcache readdir just in case that someone called d_drop()
20465495c2d0SYan, Zheng 	 * or d_invalidate(), but MDS didn't revoke CEPH_CAP_FILE_SHARED
20475495c2d0SYan, Zheng 	 * properly (dcache readdir is still enabled) */
20485495c2d0SYan, Zheng 	di = ceph_dentry(dentry);
20495495c2d0SYan, Zheng 	if (di->offset > 0 &&
20505495c2d0SYan, Zheng 	    di->lease_shared_gen == atomic_read(&dir_ci->i_shared_gen))
20515495c2d0SYan, Zheng 		__ceph_dir_clear_ordered(dir_ci);
2052b58dc410SSage Weil }
20532817b000SSage Weil 
20542817b000SSage Weil /*
20552817b000SSage Weil  * read() on a dir.  This weird interface hack only works if mounted
20562817b000SSage Weil  * with '-o dirstat'.
20572817b000SSage Weil  */
ceph_read_dir(struct file * file,char __user * buf,size_t size,loff_t * ppos)20582817b000SSage Weil static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
20592817b000SSage Weil 			     loff_t *ppos)
20602817b000SSage Weil {
2061bb48bd4dSChengguang Xu 	struct ceph_dir_file_info *dfi = file->private_data;
2062496ad9aaSAl Viro 	struct inode *inode = file_inode(file);
20632817b000SSage Weil 	struct ceph_inode_info *ci = ceph_inode(inode);
20642817b000SSage Weil 	int left;
2065ae598083SSage Weil 	const int bufsize = 1024;
20662817b000SSage Weil 
2067*985b9ee8SXiubo Li 	if (!ceph_test_mount_opt(ceph_sb_to_fs_client(inode->i_sb), DIRSTAT))
20682817b000SSage Weil 		return -EISDIR;
20692817b000SSage Weil 
2070bb48bd4dSChengguang Xu 	if (!dfi->dir_info) {
2071bb48bd4dSChengguang Xu 		dfi->dir_info = kmalloc(bufsize, GFP_KERNEL);
2072bb48bd4dSChengguang Xu 		if (!dfi->dir_info)
20732817b000SSage Weil 			return -ENOMEM;
2074bb48bd4dSChengguang Xu 		dfi->dir_info_len =
2075bb48bd4dSChengguang Xu 			snprintf(dfi->dir_info, bufsize,
20762817b000SSage Weil 				"entries:   %20lld\n"
20772817b000SSage Weil 				" files:    %20lld\n"
20782817b000SSage Weil 				" subdirs:  %20lld\n"
20792817b000SSage Weil 				"rentries:  %20lld\n"
20802817b000SSage Weil 				" rfiles:   %20lld\n"
20812817b000SSage Weil 				" rsubdirs: %20lld\n"
20822817b000SSage Weil 				"rbytes:    %20lld\n"
20839bbeab41SArnd Bergmann 				"rctime:    %10lld.%09ld\n",
20842817b000SSage Weil 				ci->i_files + ci->i_subdirs,
20852817b000SSage Weil 				ci->i_files,
20862817b000SSage Weil 				ci->i_subdirs,
20872817b000SSage Weil 				ci->i_rfiles + ci->i_rsubdirs,
20882817b000SSage Weil 				ci->i_rfiles,
20892817b000SSage Weil 				ci->i_rsubdirs,
20902817b000SSage Weil 				ci->i_rbytes,
20919bbeab41SArnd Bergmann 				ci->i_rctime.tv_sec,
20929bbeab41SArnd Bergmann 				ci->i_rctime.tv_nsec);
20932817b000SSage Weil 	}
20942817b000SSage Weil 
2095bb48bd4dSChengguang Xu 	if (*ppos >= dfi->dir_info_len)
20962817b000SSage Weil 		return 0;
2097bb48bd4dSChengguang Xu 	size = min_t(unsigned, size, dfi->dir_info_len-*ppos);
2098bb48bd4dSChengguang Xu 	left = copy_to_user(buf, dfi->dir_info + *ppos, size);
20992817b000SSage Weil 	if (left == size)
21002817b000SSage Weil 		return -EFAULT;
21012817b000SSage Weil 	*ppos += (size - left);
21022817b000SSage Weil 	return size - left;
21032817b000SSage Weil }
21042817b000SSage Weil 
21052817b000SSage Weil 
21062817b000SSage Weil 
21076c0f3af7SSage Weil /*
21086c0f3af7SSage Weil  * Return name hash for a given dentry.  This is dependent on
21096c0f3af7SSage Weil  * the parent directory's hash function.
21106c0f3af7SSage Weil  */
ceph_dentry_hash(struct inode * dir,struct dentry * dn)2111e5f86dc3SSage Weil unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn)
21126c0f3af7SSage Weil {
21136c0f3af7SSage Weil 	struct ceph_inode_info *dci = ceph_inode(dir);
211476a495d6SJeff Layton 	unsigned hash;
21156c0f3af7SSage Weil 
21166c0f3af7SSage Weil 	switch (dci->i_dir_layout.dl_dir_hash) {
21176c0f3af7SSage Weil 	case 0:	/* for backward compat */
21186c0f3af7SSage Weil 	case CEPH_STR_HASH_LINUX:
21196c0f3af7SSage Weil 		return dn->d_name.hash;
21206c0f3af7SSage Weil 
21216c0f3af7SSage Weil 	default:
212276a495d6SJeff Layton 		spin_lock(&dn->d_lock);
212376a495d6SJeff Layton 		hash = ceph_str_hash(dci->i_dir_layout.dl_dir_hash,
21246c0f3af7SSage Weil 				     dn->d_name.name, dn->d_name.len);
212576a495d6SJeff Layton 		spin_unlock(&dn->d_lock);
212676a495d6SJeff Layton 		return hash;
21276c0f3af7SSage Weil 	}
21286c0f3af7SSage Weil }
21296c0f3af7SSage Weil 
21303e327154SLinus Torvalds WRAP_DIR_ITER(ceph_readdir) // FIXME!
21312817b000SSage Weil const struct file_operations ceph_dir_fops = {
21322817b000SSage Weil 	.read = ceph_read_dir,
21333e327154SLinus Torvalds 	.iterate_shared = shared_ceph_readdir,
21342817b000SSage Weil 	.llseek = ceph_dir_llseek,
21352817b000SSage Weil 	.open = ceph_open,
21362817b000SSage Weil 	.release = ceph_release,
21372817b000SSage Weil 	.unlocked_ioctl = ceph_ioctl,
213818bd6caaSArnd Bergmann 	.compat_ioctl = compat_ptr_ioctl,
2139da819c81SYan, Zheng 	.fsync = ceph_fsync,
2140597817ddSYan, Zheng 	.lock = ceph_lock,
2141597817ddSYan, Zheng 	.flock = ceph_flock,
21422817b000SSage Weil };
21432817b000SSage Weil 
214438c48b5fSYan, Zheng const struct file_operations ceph_snapdir_fops = {
21453e327154SLinus Torvalds 	.iterate_shared = shared_ceph_readdir,
214638c48b5fSYan, Zheng 	.llseek = ceph_dir_llseek,
214738c48b5fSYan, Zheng 	.open = ceph_open,
214838c48b5fSYan, Zheng 	.release = ceph_release,
214938c48b5fSYan, Zheng };
215038c48b5fSYan, Zheng 
21512817b000SSage Weil const struct inode_operations ceph_dir_iops = {
21522817b000SSage Weil 	.lookup = ceph_lookup,
21532817b000SSage Weil 	.permission = ceph_permission,
21542817b000SSage Weil 	.getattr = ceph_getattr,
21552817b000SSage Weil 	.setattr = ceph_setattr,
21562817b000SSage Weil 	.listxattr = ceph_listxattr,
2157cac2f8b8SChristian Brauner 	.get_inode_acl = ceph_get_acl,
215872466d0bSSage Weil 	.set_acl = ceph_set_acl,
21592817b000SSage Weil 	.mknod = ceph_mknod,
21602817b000SSage Weil 	.symlink = ceph_symlink,
21612817b000SSage Weil 	.mkdir = ceph_mkdir,
21622817b000SSage Weil 	.link = ceph_link,
21632817b000SSage Weil 	.unlink = ceph_unlink,
21642817b000SSage Weil 	.rmdir = ceph_unlink,
21652817b000SSage Weil 	.rename = ceph_rename,
21662817b000SSage Weil 	.create = ceph_create,
21672d83bde9SMiklos Szeredi 	.atomic_open = ceph_atomic_open,
21682817b000SSage Weil };
21692817b000SSage Weil 
217038c48b5fSYan, Zheng const struct inode_operations ceph_snapdir_iops = {
217138c48b5fSYan, Zheng 	.lookup = ceph_lookup,
217238c48b5fSYan, Zheng 	.permission = ceph_permission,
217338c48b5fSYan, Zheng 	.getattr = ceph_getattr,
217438c48b5fSYan, Zheng 	.mkdir = ceph_mkdir,
217538c48b5fSYan, Zheng 	.rmdir = ceph_unlink,
21760ea611a3SYan, Zheng 	.rename = ceph_rename,
217738c48b5fSYan, Zheng };
217838c48b5fSYan, Zheng 
217952dfb8acSSage Weil const struct dentry_operations ceph_dentry_ops = {
21802817b000SSage Weil 	.d_revalidate = ceph_d_revalidate,
21811e9c2eb6SYan, Zheng 	.d_delete = ceph_d_delete,
2182147851d2SSage Weil 	.d_release = ceph_d_release,
2183b58dc410SSage Weil 	.d_prune = ceph_d_prune,
2184ad5cb123SAl Viro 	.d_init = ceph_d_init,
21852817b000SSage Weil };
2186