xref: /openbmc/linux/fs/orangefs/dir.c (revision 8092895f)
15db11c21SMike Marshall /*
25db11c21SMike Marshall  * (C) 2001 Clemson University and The University of Chicago
35db11c21SMike Marshall  *
45db11c21SMike Marshall  * See COPYING in top-level directory.
55db11c21SMike Marshall  */
65db11c21SMike Marshall 
75db11c21SMike Marshall #include "protocol.h"
85db11c21SMike Marshall #include "pvfs2-kernel.h"
95db11c21SMike Marshall #include "pvfs2-bufmap.h"
105db11c21SMike Marshall 
115db11c21SMike Marshall struct readdir_handle_s {
125db11c21SMike Marshall 	int buffer_index;
135db11c21SMike Marshall 	struct pvfs2_readdir_response_s readdir_response;
145db11c21SMike Marshall 	void *dents_buf;
155db11c21SMike Marshall };
165db11c21SMike Marshall 
175db11c21SMike Marshall /*
185db11c21SMike Marshall  * decode routine needed by kmod to make sense of the shared page for readdirs.
195db11c21SMike Marshall  */
208092895fSAl Viro static long decode_dirents(char *ptr, size_t size,
218092895fSAl Viro 			   struct pvfs2_readdir_response_s *readdir)
225db11c21SMike Marshall {
235db11c21SMike Marshall 	int i;
245db11c21SMike Marshall 	struct pvfs2_readdir_response_s *rd =
255db11c21SMike Marshall 		(struct pvfs2_readdir_response_s *) ptr;
265db11c21SMike Marshall 	char *buf = ptr;
275db11c21SMike Marshall 
288092895fSAl Viro 	if (size < offsetof(struct pvfs2_readdir_response_s, dirent_array))
298092895fSAl Viro 		return -EINVAL;
308092895fSAl Viro 
315db11c21SMike Marshall 	readdir->token = rd->token;
325db11c21SMike Marshall 	readdir->pvfs_dirent_outcount = rd->pvfs_dirent_outcount;
33ef4af94eSAl Viro 	readdir->dirent_array = kcalloc(readdir->pvfs_dirent_outcount,
345db11c21SMike Marshall 					sizeof(*readdir->dirent_array),
355db11c21SMike Marshall 					GFP_KERNEL);
365db11c21SMike Marshall 	if (readdir->dirent_array == NULL)
375db11c21SMike Marshall 		return -ENOMEM;
388092895fSAl Viro 
399be68b08SAl Viro 	buf += offsetof(struct pvfs2_readdir_response_s, dirent_array);
408092895fSAl Viro 	size -= offsetof(struct pvfs2_readdir_response_s, dirent_array);
418092895fSAl Viro 
425db11c21SMike Marshall 	for (i = 0; i < readdir->pvfs_dirent_outcount; i++) {
438092895fSAl Viro 		__u32 len;
448092895fSAl Viro 
458092895fSAl Viro 		if (size < 4)
468092895fSAl Viro 			goto Einval;
478092895fSAl Viro 
488092895fSAl Viro 		len = *(__u32 *)buf;
498092895fSAl Viro 		if (len >= (unsigned)-24)
508092895fSAl Viro 			goto Einval;
518092895fSAl Viro 
529be68b08SAl Viro 		readdir->dirent_array[i].d_name = buf + 4;
539be68b08SAl Viro 		readdir->dirent_array[i].d_length = len;
548092895fSAl Viro 
558092895fSAl Viro 		len = roundup8(4 + len + 1);
568092895fSAl Viro 		if (size < len + 16)
578092895fSAl Viro 			goto Einval;
588092895fSAl Viro 		size -= len + 16;
598092895fSAl Viro 
608092895fSAl Viro 		buf += len;
618092895fSAl Viro 
625db11c21SMike Marshall 		readdir->dirent_array[i].khandle =
639be68b08SAl Viro 			*(struct pvfs2_khandle *) buf;
649be68b08SAl Viro 		buf += 16;
655db11c21SMike Marshall 	}
669be68b08SAl Viro 	return buf - ptr;
678092895fSAl Viro Einval:
688092895fSAl Viro 	kfree(readdir->dirent_array);
698092895fSAl Viro 	readdir->dirent_array = NULL;
708092895fSAl Viro 	return -EINVAL;
715db11c21SMike Marshall }
725db11c21SMike Marshall 
735db11c21SMike Marshall static long readdir_handle_ctor(struct readdir_handle_s *rhandle, void *buf,
748092895fSAl Viro 				size_t size, int buffer_index)
755db11c21SMike Marshall {
765db11c21SMike Marshall 	long ret;
775db11c21SMike Marshall 
785db11c21SMike Marshall 	if (buf == NULL) {
795db11c21SMike Marshall 		gossip_err
805db11c21SMike Marshall 		    ("Invalid NULL buffer specified in readdir_handle_ctor\n");
815db11c21SMike Marshall 		return -ENOMEM;
825db11c21SMike Marshall 	}
835db11c21SMike Marshall 	if (buffer_index < 0) {
845db11c21SMike Marshall 		gossip_err
855db11c21SMike Marshall 		    ("Invalid buffer index specified in readdir_handle_ctor\n");
865db11c21SMike Marshall 		return -EINVAL;
875db11c21SMike Marshall 	}
885db11c21SMike Marshall 	rhandle->buffer_index = buffer_index;
895db11c21SMike Marshall 	rhandle->dents_buf = buf;
908092895fSAl Viro 	ret = decode_dirents(buf, size, &rhandle->readdir_response);
915db11c21SMike Marshall 	if (ret < 0) {
925db11c21SMike Marshall 		gossip_err("Could not decode readdir from buffer %ld\n", ret);
935db11c21SMike Marshall 		rhandle->buffer_index = -1;
945db11c21SMike Marshall 		gossip_debug(GOSSIP_DIR_DEBUG, "vfree %p\n", buf);
955db11c21SMike Marshall 		vfree(buf);
965db11c21SMike Marshall 		rhandle->dents_buf = NULL;
975db11c21SMike Marshall 	}
985db11c21SMike Marshall 	return ret;
995db11c21SMike Marshall }
1005db11c21SMike Marshall 
1015db11c21SMike Marshall static void readdir_handle_dtor(struct pvfs2_bufmap *bufmap,
1025db11c21SMike Marshall 		struct readdir_handle_s *rhandle)
1035db11c21SMike Marshall {
1045db11c21SMike Marshall 	if (rhandle == NULL)
1055db11c21SMike Marshall 		return;
1065db11c21SMike Marshall 
1075db11c21SMike Marshall 	/* kfree(NULL) is safe */
1085db11c21SMike Marshall 	kfree(rhandle->readdir_response.dirent_array);
1095db11c21SMike Marshall 	rhandle->readdir_response.dirent_array = NULL;
1105db11c21SMike Marshall 
1115db11c21SMike Marshall 	if (rhandle->buffer_index >= 0) {
1125db11c21SMike Marshall 		readdir_index_put(bufmap, rhandle->buffer_index);
1135db11c21SMike Marshall 		rhandle->buffer_index = -1;
1145db11c21SMike Marshall 	}
1155db11c21SMike Marshall 	if (rhandle->dents_buf) {
1165db11c21SMike Marshall 		gossip_debug(GOSSIP_DIR_DEBUG, "vfree %p\n",
1175db11c21SMike Marshall 			     rhandle->dents_buf);
1185db11c21SMike Marshall 		vfree(rhandle->dents_buf);
1195db11c21SMike Marshall 		rhandle->dents_buf = NULL;
1205db11c21SMike Marshall 	}
1215db11c21SMike Marshall }
1225db11c21SMike Marshall 
1235db11c21SMike Marshall /*
1245db11c21SMike Marshall  * Read directory entries from an instance of an open directory.
1255db11c21SMike Marshall  */
1265db11c21SMike Marshall static int pvfs2_readdir(struct file *file, struct dir_context *ctx)
1275db11c21SMike Marshall {
1285db11c21SMike Marshall 	struct pvfs2_bufmap *bufmap = NULL;
1295db11c21SMike Marshall 	int ret = 0;
1305db11c21SMike Marshall 	int buffer_index;
13188309aaeSMike Marshall 	/*
13288309aaeSMike Marshall 	 * ptoken supports Orangefs' distributed directory logic, added
13388309aaeSMike Marshall 	 * in 2.9.2.
13488309aaeSMike Marshall 	 */
1355db11c21SMike Marshall 	__u64 *ptoken = file->private_data;
1365db11c21SMike Marshall 	__u64 pos = 0;
1375db11c21SMike Marshall 	ino_t ino = 0;
1385db11c21SMike Marshall 	struct dentry *dentry = file->f_path.dentry;
1395db11c21SMike Marshall 	struct pvfs2_kernel_op_s *new_op = NULL;
1405db11c21SMike Marshall 	struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(dentry->d_inode);
1415db11c21SMike Marshall 	int buffer_full = 0;
1425db11c21SMike Marshall 	struct readdir_handle_s rhandle;
1435db11c21SMike Marshall 	int i = 0;
1445db11c21SMike Marshall 	int len = 0;
1455db11c21SMike Marshall 	ino_t current_ino = 0;
1465db11c21SMike Marshall 	char *current_entry = NULL;
1475db11c21SMike Marshall 	long bytes_decoded;
1485db11c21SMike Marshall 
14988309aaeSMike Marshall 	gossip_debug(GOSSIP_DIR_DEBUG,
15088309aaeSMike Marshall 		     "%s: ctx->pos:%lld, ptoken = %llu\n",
1515db11c21SMike Marshall 		     __func__,
1525db11c21SMike Marshall 		     lld(ctx->pos),
1535db11c21SMike Marshall 		     llu(*ptoken));
1545db11c21SMike Marshall 
1555db11c21SMike Marshall 	pos = (__u64) ctx->pos;
1565db11c21SMike Marshall 
1575db11c21SMike Marshall 	/* are we done? */
1585db11c21SMike Marshall 	if (pos == PVFS_READDIR_END) {
1595db11c21SMike Marshall 		gossip_debug(GOSSIP_DIR_DEBUG,
1605db11c21SMike Marshall 			     "Skipping to termination path\n");
1615db11c21SMike Marshall 		return 0;
1625db11c21SMike Marshall 	}
1635db11c21SMike Marshall 
1645db11c21SMike Marshall 	gossip_debug(GOSSIP_DIR_DEBUG,
1655db11c21SMike Marshall 		     "pvfs2_readdir called on %s (pos=%llu)\n",
1665db11c21SMike Marshall 		     dentry->d_name.name, llu(pos));
1675db11c21SMike Marshall 
1685db11c21SMike Marshall 	rhandle.buffer_index = -1;
1695db11c21SMike Marshall 	rhandle.dents_buf = NULL;
1705db11c21SMike Marshall 	memset(&rhandle.readdir_response, 0, sizeof(rhandle.readdir_response));
1715db11c21SMike Marshall 
1725db11c21SMike Marshall 	new_op = op_alloc(PVFS2_VFS_OP_READDIR);
1735db11c21SMike Marshall 	if (!new_op)
1745db11c21SMike Marshall 		return -ENOMEM;
1755db11c21SMike Marshall 
1765db11c21SMike Marshall 	new_op->uses_shared_memory = 1;
1775db11c21SMike Marshall 	new_op->upcall.req.readdir.refn = pvfs2_inode->refn;
1785db11c21SMike Marshall 	new_op->upcall.req.readdir.max_dirent_count = MAX_DIRENT_COUNT_READDIR;
1795db11c21SMike Marshall 
1805db11c21SMike Marshall 	gossip_debug(GOSSIP_DIR_DEBUG,
1815db11c21SMike Marshall 		     "%s: upcall.req.readdir.refn.khandle: %pU\n",
1825db11c21SMike Marshall 		     __func__,
1835db11c21SMike Marshall 		     &new_op->upcall.req.readdir.refn.khandle);
1845db11c21SMike Marshall 
1855db11c21SMike Marshall 	new_op->upcall.req.readdir.token = *ptoken;
1865db11c21SMike Marshall 
1875db11c21SMike Marshall get_new_buffer_index:
1885db11c21SMike Marshall 	ret = readdir_index_get(&bufmap, &buffer_index);
1895db11c21SMike Marshall 	if (ret < 0) {
1905db11c21SMike Marshall 		gossip_lerr("pvfs2_readdir: readdir_index_get() failure (%d)\n",
1915db11c21SMike Marshall 			    ret);
1925db11c21SMike Marshall 		goto out_free_op;
1935db11c21SMike Marshall 	}
1945db11c21SMike Marshall 	new_op->upcall.req.readdir.buf_index = buffer_index;
1955db11c21SMike Marshall 
1965db11c21SMike Marshall 	ret = service_operation(new_op,
1975db11c21SMike Marshall 				"pvfs2_readdir",
1985db11c21SMike Marshall 				get_interruptible_flag(dentry->d_inode));
1995db11c21SMike Marshall 
2005db11c21SMike Marshall 	gossip_debug(GOSSIP_DIR_DEBUG,
2015db11c21SMike Marshall 		     "Readdir downcall status is %d.  ret:%d\n",
2025db11c21SMike Marshall 		     new_op->downcall.status,
2035db11c21SMike Marshall 		     ret);
2045db11c21SMike Marshall 
2055db11c21SMike Marshall 	if (ret == -EAGAIN && op_state_purged(new_op)) {
2065db11c21SMike Marshall 		/*
2075db11c21SMike Marshall 		 * readdir shared memory aread has been wiped due to
2085db11c21SMike Marshall 		 * pvfs2-client-core restarting, so we must get a new
2095db11c21SMike Marshall 		 * index into the shared memory.
2105db11c21SMike Marshall 		 */
2115db11c21SMike Marshall 		gossip_debug(GOSSIP_DIR_DEBUG,
2125db11c21SMike Marshall 			"%s: Getting new buffer_index for retry of readdir..\n",
2135db11c21SMike Marshall 			 __func__);
2145db11c21SMike Marshall 		readdir_index_put(bufmap, buffer_index);
2155db11c21SMike Marshall 		goto get_new_buffer_index;
2165db11c21SMike Marshall 	}
2175db11c21SMike Marshall 
2185db11c21SMike Marshall 	if (ret == -EIO && op_state_purged(new_op)) {
2195db11c21SMike Marshall 		gossip_err("%s: Client is down. Aborting readdir call.\n",
2205db11c21SMike Marshall 			__func__);
2215db11c21SMike Marshall 		readdir_index_put(bufmap, buffer_index);
2225db11c21SMike Marshall 		goto out_free_op;
2235db11c21SMike Marshall 	}
2245db11c21SMike Marshall 
2255db11c21SMike Marshall 	if (ret < 0 || new_op->downcall.status != 0) {
2265db11c21SMike Marshall 		gossip_debug(GOSSIP_DIR_DEBUG,
2275db11c21SMike Marshall 			     "Readdir request failed.  Status:%d\n",
2285db11c21SMike Marshall 			     new_op->downcall.status);
2295db11c21SMike Marshall 		readdir_index_put(bufmap, buffer_index);
2305db11c21SMike Marshall 		if (ret >= 0)
2315db11c21SMike Marshall 			ret = new_op->downcall.status;
2325db11c21SMike Marshall 		goto out_free_op;
2335db11c21SMike Marshall 	}
2345db11c21SMike Marshall 
2355db11c21SMike Marshall 	bytes_decoded =
2365db11c21SMike Marshall 		readdir_handle_ctor(&rhandle,
2375db11c21SMike Marshall 				    new_op->downcall.trailer_buf,
2388092895fSAl Viro 				    new_op->downcall.trailer_size,
2395db11c21SMike Marshall 				    buffer_index);
2405db11c21SMike Marshall 	if (bytes_decoded < 0) {
2415db11c21SMike Marshall 		gossip_err("pvfs2_readdir: Could not decode trailer buffer into a readdir response %d\n",
2425db11c21SMike Marshall 			ret);
2435db11c21SMike Marshall 		ret = bytes_decoded;
2445db11c21SMike Marshall 		readdir_index_put(bufmap, buffer_index);
2455db11c21SMike Marshall 		goto out_free_op;
2465db11c21SMike Marshall 	}
2475db11c21SMike Marshall 
2485db11c21SMike Marshall 	if (bytes_decoded != new_op->downcall.trailer_size) {
24988309aaeSMike Marshall 		gossip_err("pvfs2_readdir: # bytes decoded (%ld) "
25088309aaeSMike Marshall 			   "!= trailer size (%ld)\n",
2515db11c21SMike Marshall 			   bytes_decoded,
2525db11c21SMike Marshall 			   (long)new_op->downcall.trailer_size);
2535db11c21SMike Marshall 		ret = -EINVAL;
2545db11c21SMike Marshall 		goto out_destroy_handle;
2555db11c21SMike Marshall 	}
2565db11c21SMike Marshall 
25788309aaeSMike Marshall 	/*
25888309aaeSMike Marshall 	 *  pvfs2 doesn't actually store dot and dot-dot, but
25988309aaeSMike Marshall 	 *  we need to have them represented.
26088309aaeSMike Marshall 	 */
2615db11c21SMike Marshall 	if (pos == 0) {
2625db11c21SMike Marshall 		ino = get_ino_from_khandle(dentry->d_inode);
2635db11c21SMike Marshall 		gossip_debug(GOSSIP_DIR_DEBUG,
2645db11c21SMike Marshall 			     "%s: calling dir_emit of \".\" with pos = %llu\n",
2655db11c21SMike Marshall 			     __func__,
2665db11c21SMike Marshall 			     llu(pos));
2675db11c21SMike Marshall 		ret = dir_emit(ctx, ".", 1, ino, DT_DIR);
26888309aaeSMike Marshall 		pos += 1;
2695db11c21SMike Marshall 	}
2705db11c21SMike Marshall 
2715db11c21SMike Marshall 	if (pos == 1) {
2725db11c21SMike Marshall 		ino = get_parent_ino_from_dentry(dentry);
2735db11c21SMike Marshall 		gossip_debug(GOSSIP_DIR_DEBUG,
2745db11c21SMike Marshall 			     "%s: calling dir_emit of \"..\" with pos = %llu\n",
2755db11c21SMike Marshall 			     __func__,
2765db11c21SMike Marshall 			     llu(pos));
2775db11c21SMike Marshall 		ret = dir_emit(ctx, "..", 2, ino, DT_DIR);
27888309aaeSMike Marshall 		pos += 1;
2795db11c21SMike Marshall 	}
2805db11c21SMike Marshall 
28188309aaeSMike Marshall 	/*
28288309aaeSMike Marshall 	 * we stored PVFS_ITERATE_NEXT in ctx->pos last time around
28388309aaeSMike Marshall 	 * to prevent "finding" dot and dot-dot on any iteration
28488309aaeSMike Marshall 	 * other than the first.
28588309aaeSMike Marshall 	 */
28688309aaeSMike Marshall 	if (ctx->pos == PVFS_ITERATE_NEXT)
28788309aaeSMike Marshall 		ctx->pos = 0;
28888309aaeSMike Marshall 
28988309aaeSMike Marshall 	for (i = ctx->pos;
29088309aaeSMike Marshall 	     i < rhandle.readdir_response.pvfs_dirent_outcount;
29188309aaeSMike Marshall 	     i++) {
2925db11c21SMike Marshall 		len = rhandle.readdir_response.dirent_array[i].d_length;
2935db11c21SMike Marshall 		current_entry = rhandle.readdir_response.dirent_array[i].d_name;
2945db11c21SMike Marshall 		current_ino = pvfs2_khandle_to_ino(
2955db11c21SMike Marshall 			&(rhandle.readdir_response.dirent_array[i].khandle));
2965db11c21SMike Marshall 
2975db11c21SMike Marshall 		gossip_debug(GOSSIP_DIR_DEBUG,
29888309aaeSMike Marshall 			     "calling dir_emit for %s with len %d"
29988309aaeSMike Marshall 			     ", ctx->pos %ld\n",
3005db11c21SMike Marshall 			     current_entry,
3015db11c21SMike Marshall 			     len,
30288309aaeSMike Marshall 			     (unsigned long)ctx->pos);
30388309aaeSMike Marshall 		/*
30488309aaeSMike Marshall 		 * type is unknown. We don't return object type
30588309aaeSMike Marshall 		 * in the dirent_array. This leaves getdents
30688309aaeSMike Marshall 		 * clueless about type.
30788309aaeSMike Marshall 		 */
3085db11c21SMike Marshall 		ret =
3095db11c21SMike Marshall 		    dir_emit(ctx, current_entry, len, current_ino, DT_UNKNOWN);
31088309aaeSMike Marshall 		if (!ret)
31188309aaeSMike Marshall 			break;
3125db11c21SMike Marshall 		ctx->pos++;
31388309aaeSMike Marshall 		gossip_debug(GOSSIP_DIR_DEBUG,
3145db11c21SMike Marshall 			      "%s: ctx->pos:%lld\n",
3155db11c21SMike Marshall 			      __func__,
3165db11c21SMike Marshall 			      lld(ctx->pos));
3175db11c21SMike Marshall 
3185db11c21SMike Marshall 	}
3195db11c21SMike Marshall 
32088309aaeSMike Marshall 	/*
32188309aaeSMike Marshall 	 * we ran all the way through the last batch, set up for
32288309aaeSMike Marshall 	 * getting another batch...
32388309aaeSMike Marshall 	 */
32488309aaeSMike Marshall 	if (ret) {
3255db11c21SMike Marshall 		*ptoken = rhandle.readdir_response.token;
32688309aaeSMike Marshall 		ctx->pos = PVFS_ITERATE_NEXT;
3275db11c21SMike Marshall 	}
3285db11c21SMike Marshall 
3295db11c21SMike Marshall 	/*
3305db11c21SMike Marshall 	 * Did we hit the end of the directory?
3315db11c21SMike Marshall 	 */
3325db11c21SMike Marshall 	if (rhandle.readdir_response.token == PVFS_READDIR_END &&
3335db11c21SMike Marshall 	    !buffer_full) {
33488309aaeSMike Marshall 		gossip_debug(GOSSIP_DIR_DEBUG,
33588309aaeSMike Marshall 		"End of dir detected; setting ctx->pos to PVFS_READDIR_END.\n");
3365db11c21SMike Marshall 		ctx->pos = PVFS_READDIR_END;
3375db11c21SMike Marshall 	}
3385db11c21SMike Marshall 
3395db11c21SMike Marshall out_destroy_handle:
3405db11c21SMike Marshall 	readdir_handle_dtor(bufmap, &rhandle);
3415db11c21SMike Marshall out_free_op:
3425db11c21SMike Marshall 	op_release(new_op);
3435db11c21SMike Marshall 	gossip_debug(GOSSIP_DIR_DEBUG, "pvfs2_readdir returning %d\n", ret);
3445db11c21SMike Marshall 	return ret;
3455db11c21SMike Marshall }
3465db11c21SMike Marshall 
3475db11c21SMike Marshall static int pvfs2_dir_open(struct inode *inode, struct file *file)
3485db11c21SMike Marshall {
3495db11c21SMike Marshall 	__u64 *ptoken;
3505db11c21SMike Marshall 
3515db11c21SMike Marshall 	file->private_data = kmalloc(sizeof(__u64), GFP_KERNEL);
3525db11c21SMike Marshall 	if (!file->private_data)
3535db11c21SMike Marshall 		return -ENOMEM;
3545db11c21SMike Marshall 
3555db11c21SMike Marshall 	ptoken = file->private_data;
3565db11c21SMike Marshall 	*ptoken = PVFS_READDIR_START;
3575db11c21SMike Marshall 	return 0;
3585db11c21SMike Marshall }
3595db11c21SMike Marshall 
3605db11c21SMike Marshall static int pvfs2_dir_release(struct inode *inode, struct file *file)
3615db11c21SMike Marshall {
3625db11c21SMike Marshall 	pvfs2_flush_inode(inode);
3635db11c21SMike Marshall 	kfree(file->private_data);
3645db11c21SMike Marshall 	return 0;
3655db11c21SMike Marshall }
3665db11c21SMike Marshall 
3675db11c21SMike Marshall /** PVFS2 implementation of VFS directory operations */
3685db11c21SMike Marshall const struct file_operations pvfs2_dir_operations = {
3695db11c21SMike Marshall 	.read = generic_read_dir,
3705db11c21SMike Marshall 	.iterate = pvfs2_readdir,
3715db11c21SMike Marshall 	.open = pvfs2_dir_open,
3725db11c21SMike Marshall 	.release = pvfs2_dir_release,
3735db11c21SMike Marshall };
374