xref: /openbmc/linux/fs/orangefs/orangefs-utils.c (revision 9fa996c5f003beae0d8ca323caf06a2b73e471ec)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * (C) 2001 Clemson University and The University of Chicago
4  * Copyright 2018 Omnibond Systems, L.L.C.
5  *
6  * See COPYING in top-level directory.
7  */
8 #include <linux/kernel.h>
9 #include "protocol.h"
10 #include "orangefs-kernel.h"
11 #include "orangefs-dev-proto.h"
12 #include "orangefs-bufmap.h"
13 
14 __s32 fsid_of_op(struct orangefs_kernel_op_s *op)
15 {
16 	__s32 fsid = ORANGEFS_FS_ID_NULL;
17 
18 	if (op) {
19 		switch (op->upcall.type) {
20 		case ORANGEFS_VFS_OP_FILE_IO:
21 			fsid = op->upcall.req.io.refn.fs_id;
22 			break;
23 		case ORANGEFS_VFS_OP_LOOKUP:
24 			fsid = op->upcall.req.lookup.parent_refn.fs_id;
25 			break;
26 		case ORANGEFS_VFS_OP_CREATE:
27 			fsid = op->upcall.req.create.parent_refn.fs_id;
28 			break;
29 		case ORANGEFS_VFS_OP_GETATTR:
30 			fsid = op->upcall.req.getattr.refn.fs_id;
31 			break;
32 		case ORANGEFS_VFS_OP_REMOVE:
33 			fsid = op->upcall.req.remove.parent_refn.fs_id;
34 			break;
35 		case ORANGEFS_VFS_OP_MKDIR:
36 			fsid = op->upcall.req.mkdir.parent_refn.fs_id;
37 			break;
38 		case ORANGEFS_VFS_OP_READDIR:
39 			fsid = op->upcall.req.readdir.refn.fs_id;
40 			break;
41 		case ORANGEFS_VFS_OP_SETATTR:
42 			fsid = op->upcall.req.setattr.refn.fs_id;
43 			break;
44 		case ORANGEFS_VFS_OP_SYMLINK:
45 			fsid = op->upcall.req.sym.parent_refn.fs_id;
46 			break;
47 		case ORANGEFS_VFS_OP_RENAME:
48 			fsid = op->upcall.req.rename.old_parent_refn.fs_id;
49 			break;
50 		case ORANGEFS_VFS_OP_STATFS:
51 			fsid = op->upcall.req.statfs.fs_id;
52 			break;
53 		case ORANGEFS_VFS_OP_TRUNCATE:
54 			fsid = op->upcall.req.truncate.refn.fs_id;
55 			break;
56 		case ORANGEFS_VFS_OP_RA_FLUSH:
57 			fsid = op->upcall.req.ra_cache_flush.refn.fs_id;
58 			break;
59 		case ORANGEFS_VFS_OP_FS_UMOUNT:
60 			fsid = op->upcall.req.fs_umount.fs_id;
61 			break;
62 		case ORANGEFS_VFS_OP_GETXATTR:
63 			fsid = op->upcall.req.getxattr.refn.fs_id;
64 			break;
65 		case ORANGEFS_VFS_OP_SETXATTR:
66 			fsid = op->upcall.req.setxattr.refn.fs_id;
67 			break;
68 		case ORANGEFS_VFS_OP_LISTXATTR:
69 			fsid = op->upcall.req.listxattr.refn.fs_id;
70 			break;
71 		case ORANGEFS_VFS_OP_REMOVEXATTR:
72 			fsid = op->upcall.req.removexattr.refn.fs_id;
73 			break;
74 		case ORANGEFS_VFS_OP_FSYNC:
75 			fsid = op->upcall.req.fsync.refn.fs_id;
76 			break;
77 		default:
78 			break;
79 		}
80 	}
81 	return fsid;
82 }
83 
84 static int orangefs_inode_flags(struct ORANGEFS_sys_attr_s *attrs)
85 {
86 	int flags = 0;
87 	if (attrs->flags & ORANGEFS_IMMUTABLE_FL)
88 		flags |= S_IMMUTABLE;
89 	else
90 		flags &= ~S_IMMUTABLE;
91 	if (attrs->flags & ORANGEFS_APPEND_FL)
92 		flags |= S_APPEND;
93 	else
94 		flags &= ~S_APPEND;
95 	if (attrs->flags & ORANGEFS_NOATIME_FL)
96 		flags |= S_NOATIME;
97 	else
98 		flags &= ~S_NOATIME;
99 	return flags;
100 }
101 
102 static int orangefs_inode_perms(struct ORANGEFS_sys_attr_s *attrs)
103 {
104 	int perm_mode = 0;
105 
106 	if (attrs->perms & ORANGEFS_O_EXECUTE)
107 		perm_mode |= S_IXOTH;
108 	if (attrs->perms & ORANGEFS_O_WRITE)
109 		perm_mode |= S_IWOTH;
110 	if (attrs->perms & ORANGEFS_O_READ)
111 		perm_mode |= S_IROTH;
112 
113 	if (attrs->perms & ORANGEFS_G_EXECUTE)
114 		perm_mode |= S_IXGRP;
115 	if (attrs->perms & ORANGEFS_G_WRITE)
116 		perm_mode |= S_IWGRP;
117 	if (attrs->perms & ORANGEFS_G_READ)
118 		perm_mode |= S_IRGRP;
119 
120 	if (attrs->perms & ORANGEFS_U_EXECUTE)
121 		perm_mode |= S_IXUSR;
122 	if (attrs->perms & ORANGEFS_U_WRITE)
123 		perm_mode |= S_IWUSR;
124 	if (attrs->perms & ORANGEFS_U_READ)
125 		perm_mode |= S_IRUSR;
126 
127 	if (attrs->perms & ORANGEFS_G_SGID)
128 		perm_mode |= S_ISGID;
129 	if (attrs->perms & ORANGEFS_U_SUID)
130 		perm_mode |= S_ISUID;
131 
132 	return perm_mode;
133 }
134 
135 /*
136  * NOTE: in kernel land, we never use the sys_attr->link_target for
137  * anything, so don't bother copying it into the sys_attr object here.
138  */
139 static inline void copy_attributes_from_inode(struct inode *inode,
140     struct ORANGEFS_sys_attr_s *attrs)
141 {
142 	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
143 	attrs->mask = 0;
144 	if (orangefs_inode->attr_valid & ATTR_UID) {
145 		attrs->owner = from_kuid(&init_user_ns, inode->i_uid);
146 		attrs->mask |= ORANGEFS_ATTR_SYS_UID;
147 		gossip_debug(GOSSIP_UTILS_DEBUG, "(UID) %d\n", attrs->owner);
148 	}
149 	if (orangefs_inode->attr_valid & ATTR_GID) {
150 		attrs->group = from_kgid(&init_user_ns, inode->i_gid);
151 		attrs->mask |= ORANGEFS_ATTR_SYS_GID;
152 		gossip_debug(GOSSIP_UTILS_DEBUG, "(GID) %d\n", attrs->group);
153 	}
154 
155 	if (orangefs_inode->attr_valid & ATTR_ATIME) {
156 		attrs->mask |= ORANGEFS_ATTR_SYS_ATIME;
157 		if (orangefs_inode->attr_valid & ATTR_ATIME_SET) {
158 			attrs->atime = (time64_t)inode->i_atime.tv_sec;
159 			attrs->mask |= ORANGEFS_ATTR_SYS_ATIME_SET;
160 		}
161 	}
162 	if (orangefs_inode->attr_valid & ATTR_MTIME) {
163 		attrs->mask |= ORANGEFS_ATTR_SYS_MTIME;
164 		if (orangefs_inode->attr_valid & ATTR_MTIME_SET) {
165 			attrs->mtime = (time64_t)inode->i_mtime.tv_sec;
166 			attrs->mask |= ORANGEFS_ATTR_SYS_MTIME_SET;
167 		}
168 	}
169 	if (orangefs_inode->attr_valid & ATTR_CTIME)
170 		attrs->mask |= ORANGEFS_ATTR_SYS_CTIME;
171 
172 	/*
173 	 * ORANGEFS cannot set size with a setattr operation. Probably not
174 	 * likely to be requested through the VFS, but just in case, don't
175 	 * worry about ATTR_SIZE
176 	 */
177 
178 	if (orangefs_inode->attr_valid & ATTR_MODE) {
179 		attrs->perms = ORANGEFS_util_translate_mode(inode->i_mode);
180 		attrs->mask |= ORANGEFS_ATTR_SYS_PERM;
181 	}
182 }
183 
184 static int orangefs_inode_type(enum orangefs_ds_type objtype)
185 {
186 	if (objtype == ORANGEFS_TYPE_METAFILE)
187 		return S_IFREG;
188 	else if (objtype == ORANGEFS_TYPE_DIRECTORY)
189 		return S_IFDIR;
190 	else if (objtype == ORANGEFS_TYPE_SYMLINK)
191 		return S_IFLNK;
192 	else
193 		return -1;
194 }
195 
196 static void orangefs_make_bad_inode(struct inode *inode)
197 {
198 	if (is_root_handle(inode)) {
199 		/*
200 		 * if this occurs, the pvfs2-client-core was killed but we
201 		 * can't afford to lose the inode operations and such
202 		 * associated with the root handle in any case.
203 		 */
204 		gossip_debug(GOSSIP_UTILS_DEBUG,
205 			     "*** NOT making bad root inode %pU\n",
206 			     get_khandle_from_ino(inode));
207 	} else {
208 		gossip_debug(GOSSIP_UTILS_DEBUG,
209 			     "*** making bad inode %pU\n",
210 			     get_khandle_from_ino(inode));
211 		make_bad_inode(inode);
212 	}
213 }
214 
215 static int orangefs_inode_is_stale(struct inode *inode,
216     struct ORANGEFS_sys_attr_s *attrs, char *link_target)
217 {
218 	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
219 	int type = orangefs_inode_type(attrs->objtype);
220 	/*
221 	 * If the inode type or symlink target have changed then this
222 	 * inode is stale.
223 	 */
224 	if (type == -1 || inode_wrong_type(inode, type)) {
225 		orangefs_make_bad_inode(inode);
226 		return 1;
227 	}
228 	if (type == S_IFLNK && strncmp(orangefs_inode->link_target,
229 	    link_target, ORANGEFS_NAME_MAX)) {
230 		orangefs_make_bad_inode(inode);
231 		return 1;
232 	}
233 	return 0;
234 }
235 
236 int orangefs_inode_getattr(struct inode *inode, int flags)
237 {
238 	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
239 	struct orangefs_kernel_op_s *new_op;
240 	loff_t inode_size;
241 	int ret, type;
242 
243 	gossip_debug(GOSSIP_UTILS_DEBUG, "%s: called on inode %pU flags %d\n",
244 	    __func__, get_khandle_from_ino(inode), flags);
245 
246 again:
247 	spin_lock(&inode->i_lock);
248 	/* Must have all the attributes in the mask and be within cache time. */
249 	if ((!flags && time_before(jiffies, orangefs_inode->getattr_time)) ||
250 	    orangefs_inode->attr_valid || inode->i_state & I_DIRTY_PAGES) {
251 		if (orangefs_inode->attr_valid) {
252 			spin_unlock(&inode->i_lock);
253 			write_inode_now(inode, 1);
254 			goto again;
255 		}
256 		spin_unlock(&inode->i_lock);
257 		return 0;
258 	}
259 	spin_unlock(&inode->i_lock);
260 
261 	new_op = op_alloc(ORANGEFS_VFS_OP_GETATTR);
262 	if (!new_op)
263 		return -ENOMEM;
264 	new_op->upcall.req.getattr.refn = orangefs_inode->refn;
265 	/*
266 	 * Size is the hardest attribute to get.  The incremental cost of any
267 	 * other attribute is essentially zero.
268 	 */
269 	if (flags)
270 		new_op->upcall.req.getattr.mask = ORANGEFS_ATTR_SYS_ALL_NOHINT;
271 	else
272 		new_op->upcall.req.getattr.mask =
273 		    ORANGEFS_ATTR_SYS_ALL_NOHINT & ~ORANGEFS_ATTR_SYS_SIZE;
274 
275 	ret = service_operation(new_op, __func__,
276 	    get_interruptible_flag(inode));
277 	if (ret != 0)
278 		goto out;
279 
280 again2:
281 	spin_lock(&inode->i_lock);
282 	/* Must have all the attributes in the mask and be within cache time. */
283 	if ((!flags && time_before(jiffies, orangefs_inode->getattr_time)) ||
284 	    orangefs_inode->attr_valid || inode->i_state & I_DIRTY_PAGES) {
285 		if (orangefs_inode->attr_valid) {
286 			spin_unlock(&inode->i_lock);
287 			write_inode_now(inode, 1);
288 			goto again2;
289 		}
290 		if (inode->i_state & I_DIRTY_PAGES) {
291 			ret = 0;
292 			goto out_unlock;
293 		}
294 		gossip_debug(GOSSIP_UTILS_DEBUG, "%s: in cache or dirty\n",
295 		    __func__);
296 		ret = 0;
297 		goto out_unlock;
298 	}
299 
300 	if (!(flags & ORANGEFS_GETATTR_NEW)) {
301 		ret = orangefs_inode_is_stale(inode,
302 		    &new_op->downcall.resp.getattr.attributes,
303 		    new_op->downcall.resp.getattr.link_target);
304 		if (ret) {
305 			ret = -ESTALE;
306 			goto out_unlock;
307 		}
308 	}
309 
310 	type = orangefs_inode_type(new_op->
311 	    downcall.resp.getattr.attributes.objtype);
312 	switch (type) {
313 	case S_IFREG:
314 		inode->i_flags = orangefs_inode_flags(&new_op->
315 		    downcall.resp.getattr.attributes);
316 		if (flags) {
317 			inode_size = (loff_t)new_op->
318 			    downcall.resp.getattr.attributes.size;
319 			inode->i_size = inode_size;
320 			inode->i_blkbits = ffs(new_op->downcall.resp.getattr.
321 			    attributes.blksize);
322 			inode->i_bytes = inode_size;
323 			inode->i_blocks =
324 			    (inode_size + 512 - inode_size % 512)/512;
325 		}
326 		break;
327 	case S_IFDIR:
328 		if (flags) {
329 			inode->i_size = PAGE_SIZE;
330 			inode_set_bytes(inode, inode->i_size);
331 		}
332 		set_nlink(inode, 1);
333 		break;
334 	case S_IFLNK:
335 		if (flags & ORANGEFS_GETATTR_NEW) {
336 			inode->i_size = (loff_t)strlen(new_op->
337 			    downcall.resp.getattr.link_target);
338 			ret = strscpy(orangefs_inode->link_target,
339 			    new_op->downcall.resp.getattr.link_target,
340 			    ORANGEFS_NAME_MAX);
341 			if (ret == -E2BIG) {
342 				ret = -EIO;
343 				goto out_unlock;
344 			}
345 			inode->i_link = orangefs_inode->link_target;
346 		}
347 		break;
348 	/* i.e. -1 */
349 	default:
350 		/* XXX: ESTALE?  This is what is done if it is not new. */
351 		orangefs_make_bad_inode(inode);
352 		ret = -ESTALE;
353 		goto out_unlock;
354 	}
355 
356 	inode->i_uid = make_kuid(&init_user_ns, new_op->
357 	    downcall.resp.getattr.attributes.owner);
358 	inode->i_gid = make_kgid(&init_user_ns, new_op->
359 	    downcall.resp.getattr.attributes.group);
360 	inode->i_atime.tv_sec = (time64_t)new_op->
361 	    downcall.resp.getattr.attributes.atime;
362 	inode->i_mtime.tv_sec = (time64_t)new_op->
363 	    downcall.resp.getattr.attributes.mtime;
364 	inode->i_ctime.tv_sec = (time64_t)new_op->
365 	    downcall.resp.getattr.attributes.ctime;
366 	inode->i_atime.tv_nsec = 0;
367 	inode->i_mtime.tv_nsec = 0;
368 	inode->i_ctime.tv_nsec = 0;
369 
370 	/* special case: mark the root inode as sticky */
371 	inode->i_mode = type | (is_root_handle(inode) ? S_ISVTX : 0) |
372 	    orangefs_inode_perms(&new_op->downcall.resp.getattr.attributes);
373 
374 	orangefs_inode->getattr_time = jiffies +
375 	    orangefs_getattr_timeout_msecs*HZ/1000;
376 	ret = 0;
377 out_unlock:
378 	spin_unlock(&inode->i_lock);
379 out:
380 	op_release(new_op);
381 	return ret;
382 }
383 
384 int orangefs_inode_check_changed(struct inode *inode)
385 {
386 	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
387 	struct orangefs_kernel_op_s *new_op;
388 	int ret;
389 
390 	gossip_debug(GOSSIP_UTILS_DEBUG, "%s: called on inode %pU\n", __func__,
391 	    get_khandle_from_ino(inode));
392 
393 	new_op = op_alloc(ORANGEFS_VFS_OP_GETATTR);
394 	if (!new_op)
395 		return -ENOMEM;
396 	new_op->upcall.req.getattr.refn = orangefs_inode->refn;
397 	new_op->upcall.req.getattr.mask = ORANGEFS_ATTR_SYS_TYPE |
398 	    ORANGEFS_ATTR_SYS_LNK_TARGET;
399 
400 	ret = service_operation(new_op, __func__,
401 	    get_interruptible_flag(inode));
402 	if (ret != 0)
403 		goto out;
404 
405 	ret = orangefs_inode_is_stale(inode,
406 	    &new_op->downcall.resp.getattr.attributes,
407 	    new_op->downcall.resp.getattr.link_target);
408 out:
409 	op_release(new_op);
410 	return ret;
411 }
412 
413 /*
414  * issues a orangefs setattr request to make sure the new attribute values
415  * take effect if successful.  returns 0 on success; -errno otherwise
416  */
417 int orangefs_inode_setattr(struct inode *inode)
418 {
419 	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
420 	struct orangefs_kernel_op_s *new_op;
421 	int ret;
422 
423 	new_op = op_alloc(ORANGEFS_VFS_OP_SETATTR);
424 	if (!new_op)
425 		return -ENOMEM;
426 
427 	spin_lock(&inode->i_lock);
428 	new_op->upcall.uid = from_kuid(&init_user_ns, orangefs_inode->attr_uid);
429 	new_op->upcall.gid = from_kgid(&init_user_ns, orangefs_inode->attr_gid);
430 	new_op->upcall.req.setattr.refn = orangefs_inode->refn;
431 	copy_attributes_from_inode(inode,
432 	    &new_op->upcall.req.setattr.attributes);
433 	orangefs_inode->attr_valid = 0;
434 	if (!new_op->upcall.req.setattr.attributes.mask) {
435 		spin_unlock(&inode->i_lock);
436 		op_release(new_op);
437 		return 0;
438 	}
439 	spin_unlock(&inode->i_lock);
440 
441 	ret = service_operation(new_op, __func__,
442 	    get_interruptible_flag(inode) | ORANGEFS_OP_WRITEBACK);
443 	gossip_debug(GOSSIP_UTILS_DEBUG,
444 	    "orangefs_inode_setattr: returning %d\n", ret);
445 	if (ret)
446 		orangefs_make_bad_inode(inode);
447 
448 	op_release(new_op);
449 
450 	if (ret == 0)
451 		orangefs_inode->getattr_time = jiffies - 1;
452 	return ret;
453 }
454 
455 /*
456  * The following is a very dirty hack that is now a permanent part of the
457  * ORANGEFS protocol. See protocol.h for more error definitions.
458  */
459 
460 /* The order matches include/orangefs-types.h in the OrangeFS source. */
461 static int PINT_errno_mapping[] = {
462 	0, EPERM, ENOENT, EINTR, EIO, ENXIO, EBADF, EAGAIN, ENOMEM,
463 	EFAULT, EBUSY, EEXIST, ENODEV, ENOTDIR, EISDIR, EINVAL, EMFILE,
464 	EFBIG, ENOSPC, EROFS, EMLINK, EPIPE, EDEADLK, ENAMETOOLONG,
465 	ENOLCK, ENOSYS, ENOTEMPTY, ELOOP, EWOULDBLOCK, ENOMSG, EUNATCH,
466 	EBADR, EDEADLOCK, ENODATA, ETIME, ENONET, EREMOTE, ECOMM,
467 	EPROTO, EBADMSG, EOVERFLOW, ERESTART, EMSGSIZE, EPROTOTYPE,
468 	ENOPROTOOPT, EPROTONOSUPPORT, EOPNOTSUPP, EADDRINUSE,
469 	EADDRNOTAVAIL, ENETDOWN, ENETUNREACH, ENETRESET, ENOBUFS,
470 	ETIMEDOUT, ECONNREFUSED, EHOSTDOWN, EHOSTUNREACH, EALREADY,
471 	EACCES, ECONNRESET, ERANGE
472 };
473 
474 int orangefs_normalize_to_errno(__s32 error_code)
475 {
476 	__u32 i;
477 
478 	/* Success */
479 	if (error_code == 0) {
480 		return 0;
481 	/*
482 	 * This shouldn't ever happen. If it does it should be fixed on the
483 	 * server.
484 	 */
485 	} else if (error_code > 0) {
486 		gossip_err("orangefs: error status received.\n");
487 		gossip_err("orangefs: assuming error code is inverted.\n");
488 		error_code = -error_code;
489 	}
490 
491 	/*
492 	 * XXX: This is very bad since error codes from ORANGEFS may not be
493 	 * suitable for return into userspace.
494 	 */
495 
496 	/*
497 	 * Convert ORANGEFS error values into errno values suitable for return
498 	 * from the kernel.
499 	 */
500 	if ((-error_code) & ORANGEFS_NON_ERRNO_ERROR_BIT) {
501 		if (((-error_code) &
502 		    (ORANGEFS_ERROR_NUMBER_BITS|ORANGEFS_NON_ERRNO_ERROR_BIT|
503 		    ORANGEFS_ERROR_BIT)) == ORANGEFS_ECANCEL) {
504 			/*
505 			 * cancellation error codes generally correspond to
506 			 * a timeout from the client's perspective
507 			 */
508 			error_code = -ETIMEDOUT;
509 		} else {
510 			/* assume a default error code */
511 			gossip_err("%s: bad error code :%d:.\n",
512 				__func__,
513 				error_code);
514 			error_code = -EINVAL;
515 		}
516 
517 	/* Convert ORANGEFS encoded errno values into regular errno values. */
518 	} else if ((-error_code) & ORANGEFS_ERROR_BIT) {
519 		i = (-error_code) & ~(ORANGEFS_ERROR_BIT|ORANGEFS_ERROR_CLASS_BITS);
520 		if (i < ARRAY_SIZE(PINT_errno_mapping))
521 			error_code = -PINT_errno_mapping[i];
522 		else
523 			error_code = -EINVAL;
524 
525 	/*
526 	 * Only ORANGEFS protocol error codes should ever come here. Otherwise
527 	 * there is a bug somewhere.
528 	 */
529 	} else {
530 		gossip_err("%s: unknown error code.\n", __func__);
531 		error_code = -EINVAL;
532 	}
533 	return error_code;
534 }
535 
536 #define NUM_MODES 11
537 __s32 ORANGEFS_util_translate_mode(int mode)
538 {
539 	int ret = 0;
540 	int i = 0;
541 	static int modes[NUM_MODES] = {
542 		S_IXOTH, S_IWOTH, S_IROTH,
543 		S_IXGRP, S_IWGRP, S_IRGRP,
544 		S_IXUSR, S_IWUSR, S_IRUSR,
545 		S_ISGID, S_ISUID
546 	};
547 	static int orangefs_modes[NUM_MODES] = {
548 		ORANGEFS_O_EXECUTE, ORANGEFS_O_WRITE, ORANGEFS_O_READ,
549 		ORANGEFS_G_EXECUTE, ORANGEFS_G_WRITE, ORANGEFS_G_READ,
550 		ORANGEFS_U_EXECUTE, ORANGEFS_U_WRITE, ORANGEFS_U_READ,
551 		ORANGEFS_G_SGID, ORANGEFS_U_SUID
552 	};
553 
554 	for (i = 0; i < NUM_MODES; i++)
555 		if (mode & modes[i])
556 			ret |= orangefs_modes[i];
557 
558 	return ret;
559 }
560 #undef NUM_MODES
561