xref: /openbmc/linux/fs/orangefs/orangefs-utils.c (revision ddc141e5)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * (C) 2001 Clemson University and The University of Chicago
4  *
5  * See COPYING in top-level directory.
6  */
7 #include <linux/kernel.h>
8 #include "protocol.h"
9 #include "orangefs-kernel.h"
10 #include "orangefs-dev-proto.h"
11 #include "orangefs-bufmap.h"
12 
13 __s32 fsid_of_op(struct orangefs_kernel_op_s *op)
14 {
15 	__s32 fsid = ORANGEFS_FS_ID_NULL;
16 
17 	if (op) {
18 		switch (op->upcall.type) {
19 		case ORANGEFS_VFS_OP_FILE_IO:
20 			fsid = op->upcall.req.io.refn.fs_id;
21 			break;
22 		case ORANGEFS_VFS_OP_LOOKUP:
23 			fsid = op->upcall.req.lookup.parent_refn.fs_id;
24 			break;
25 		case ORANGEFS_VFS_OP_CREATE:
26 			fsid = op->upcall.req.create.parent_refn.fs_id;
27 			break;
28 		case ORANGEFS_VFS_OP_GETATTR:
29 			fsid = op->upcall.req.getattr.refn.fs_id;
30 			break;
31 		case ORANGEFS_VFS_OP_REMOVE:
32 			fsid = op->upcall.req.remove.parent_refn.fs_id;
33 			break;
34 		case ORANGEFS_VFS_OP_MKDIR:
35 			fsid = op->upcall.req.mkdir.parent_refn.fs_id;
36 			break;
37 		case ORANGEFS_VFS_OP_READDIR:
38 			fsid = op->upcall.req.readdir.refn.fs_id;
39 			break;
40 		case ORANGEFS_VFS_OP_SETATTR:
41 			fsid = op->upcall.req.setattr.refn.fs_id;
42 			break;
43 		case ORANGEFS_VFS_OP_SYMLINK:
44 			fsid = op->upcall.req.sym.parent_refn.fs_id;
45 			break;
46 		case ORANGEFS_VFS_OP_RENAME:
47 			fsid = op->upcall.req.rename.old_parent_refn.fs_id;
48 			break;
49 		case ORANGEFS_VFS_OP_STATFS:
50 			fsid = op->upcall.req.statfs.fs_id;
51 			break;
52 		case ORANGEFS_VFS_OP_TRUNCATE:
53 			fsid = op->upcall.req.truncate.refn.fs_id;
54 			break;
55 		case ORANGEFS_VFS_OP_RA_FLUSH:
56 			fsid = op->upcall.req.ra_cache_flush.refn.fs_id;
57 			break;
58 		case ORANGEFS_VFS_OP_FS_UMOUNT:
59 			fsid = op->upcall.req.fs_umount.fs_id;
60 			break;
61 		case ORANGEFS_VFS_OP_GETXATTR:
62 			fsid = op->upcall.req.getxattr.refn.fs_id;
63 			break;
64 		case ORANGEFS_VFS_OP_SETXATTR:
65 			fsid = op->upcall.req.setxattr.refn.fs_id;
66 			break;
67 		case ORANGEFS_VFS_OP_LISTXATTR:
68 			fsid = op->upcall.req.listxattr.refn.fs_id;
69 			break;
70 		case ORANGEFS_VFS_OP_REMOVEXATTR:
71 			fsid = op->upcall.req.removexattr.refn.fs_id;
72 			break;
73 		case ORANGEFS_VFS_OP_FSYNC:
74 			fsid = op->upcall.req.fsync.refn.fs_id;
75 			break;
76 		default:
77 			break;
78 		}
79 	}
80 	return fsid;
81 }
82 
83 static int orangefs_inode_flags(struct ORANGEFS_sys_attr_s *attrs)
84 {
85 	int flags = 0;
86 	if (attrs->flags & ORANGEFS_IMMUTABLE_FL)
87 		flags |= S_IMMUTABLE;
88 	else
89 		flags &= ~S_IMMUTABLE;
90 	if (attrs->flags & ORANGEFS_APPEND_FL)
91 		flags |= S_APPEND;
92 	else
93 		flags &= ~S_APPEND;
94 	if (attrs->flags & ORANGEFS_NOATIME_FL)
95 		flags |= S_NOATIME;
96 	else
97 		flags &= ~S_NOATIME;
98 	return flags;
99 }
100 
101 static int orangefs_inode_perms(struct ORANGEFS_sys_attr_s *attrs)
102 {
103 	int perm_mode = 0;
104 
105 	if (attrs->perms & ORANGEFS_O_EXECUTE)
106 		perm_mode |= S_IXOTH;
107 	if (attrs->perms & ORANGEFS_O_WRITE)
108 		perm_mode |= S_IWOTH;
109 	if (attrs->perms & ORANGEFS_O_READ)
110 		perm_mode |= S_IROTH;
111 
112 	if (attrs->perms & ORANGEFS_G_EXECUTE)
113 		perm_mode |= S_IXGRP;
114 	if (attrs->perms & ORANGEFS_G_WRITE)
115 		perm_mode |= S_IWGRP;
116 	if (attrs->perms & ORANGEFS_G_READ)
117 		perm_mode |= S_IRGRP;
118 
119 	if (attrs->perms & ORANGEFS_U_EXECUTE)
120 		perm_mode |= S_IXUSR;
121 	if (attrs->perms & ORANGEFS_U_WRITE)
122 		perm_mode |= S_IWUSR;
123 	if (attrs->perms & ORANGEFS_U_READ)
124 		perm_mode |= S_IRUSR;
125 
126 	if (attrs->perms & ORANGEFS_G_SGID)
127 		perm_mode |= S_ISGID;
128 	if (attrs->perms & ORANGEFS_U_SUID)
129 		perm_mode |= S_ISUID;
130 
131 	return perm_mode;
132 }
133 
134 /*
135  * NOTE: in kernel land, we never use the sys_attr->link_target for
136  * anything, so don't bother copying it into the sys_attr object here.
137  */
138 static inline int copy_attributes_from_inode(struct inode *inode,
139 					     struct ORANGEFS_sys_attr_s *attrs,
140 					     struct iattr *iattr)
141 {
142 	umode_t tmp_mode;
143 
144 	if (!iattr || !inode || !attrs) {
145 		gossip_err("NULL iattr (%p), inode (%p), attrs (%p) "
146 			   "in copy_attributes_from_inode!\n",
147 			   iattr,
148 			   inode,
149 			   attrs);
150 		return -EINVAL;
151 	}
152 	/*
153 	 * We need to be careful to only copy the attributes out of the
154 	 * iattr object that we know are valid.
155 	 */
156 	attrs->mask = 0;
157 	if (iattr->ia_valid & ATTR_UID) {
158 		attrs->owner = from_kuid(&init_user_ns, iattr->ia_uid);
159 		attrs->mask |= ORANGEFS_ATTR_SYS_UID;
160 		gossip_debug(GOSSIP_UTILS_DEBUG, "(UID) %d\n", attrs->owner);
161 	}
162 	if (iattr->ia_valid & ATTR_GID) {
163 		attrs->group = from_kgid(&init_user_ns, iattr->ia_gid);
164 		attrs->mask |= ORANGEFS_ATTR_SYS_GID;
165 		gossip_debug(GOSSIP_UTILS_DEBUG, "(GID) %d\n", attrs->group);
166 	}
167 
168 	if (iattr->ia_valid & ATTR_ATIME) {
169 		attrs->mask |= ORANGEFS_ATTR_SYS_ATIME;
170 		if (iattr->ia_valid & ATTR_ATIME_SET) {
171 			attrs->atime = (time64_t)iattr->ia_atime.tv_sec;
172 			attrs->mask |= ORANGEFS_ATTR_SYS_ATIME_SET;
173 		}
174 	}
175 	if (iattr->ia_valid & ATTR_MTIME) {
176 		attrs->mask |= ORANGEFS_ATTR_SYS_MTIME;
177 		if (iattr->ia_valid & ATTR_MTIME_SET) {
178 			attrs->mtime = (time64_t)iattr->ia_mtime.tv_sec;
179 			attrs->mask |= ORANGEFS_ATTR_SYS_MTIME_SET;
180 		}
181 	}
182 	if (iattr->ia_valid & ATTR_CTIME)
183 		attrs->mask |= ORANGEFS_ATTR_SYS_CTIME;
184 
185 	/*
186 	 * ORANGEFS cannot set size with a setattr operation.  Probably not likely
187 	 * to be requested through the VFS, but just in case, don't worry about
188 	 * ATTR_SIZE
189 	 */
190 
191 	if (iattr->ia_valid & ATTR_MODE) {
192 		tmp_mode = iattr->ia_mode;
193 		if (tmp_mode & (S_ISVTX)) {
194 			if (is_root_handle(inode)) {
195 				/*
196 				 * allow sticky bit to be set on root (since
197 				 * it shows up that way by default anyhow),
198 				 * but don't show it to the server
199 				 */
200 				tmp_mode -= S_ISVTX;
201 			} else {
202 				gossip_debug(GOSSIP_UTILS_DEBUG,
203 					     "User attempted to set sticky bit on non-root directory; returning EINVAL.\n");
204 				return -EINVAL;
205 			}
206 		}
207 
208 		if (tmp_mode & (S_ISUID)) {
209 			gossip_debug(GOSSIP_UTILS_DEBUG,
210 				     "Attempting to set setuid bit (not supported); returning EINVAL.\n");
211 			return -EINVAL;
212 		}
213 
214 		attrs->perms = ORANGEFS_util_translate_mode(tmp_mode);
215 		attrs->mask |= ORANGEFS_ATTR_SYS_PERM;
216 	}
217 
218 	return 0;
219 }
220 
221 static int orangefs_inode_type(enum orangefs_ds_type objtype)
222 {
223 	if (objtype == ORANGEFS_TYPE_METAFILE)
224 		return S_IFREG;
225 	else if (objtype == ORANGEFS_TYPE_DIRECTORY)
226 		return S_IFDIR;
227 	else if (objtype == ORANGEFS_TYPE_SYMLINK)
228 		return S_IFLNK;
229 	else
230 		return -1;
231 }
232 
233 static void orangefs_make_bad_inode(struct inode *inode)
234 {
235 	if (is_root_handle(inode)) {
236 		/*
237 		 * if this occurs, the pvfs2-client-core was killed but we
238 		 * can't afford to lose the inode operations and such
239 		 * associated with the root handle in any case.
240 		 */
241 		gossip_debug(GOSSIP_UTILS_DEBUG,
242 			     "*** NOT making bad root inode %pU\n",
243 			     get_khandle_from_ino(inode));
244 	} else {
245 		gossip_debug(GOSSIP_UTILS_DEBUG,
246 			     "*** making bad inode %pU\n",
247 			     get_khandle_from_ino(inode));
248 		make_bad_inode(inode);
249 	}
250 }
251 
252 static int orangefs_inode_is_stale(struct inode *inode,
253     struct ORANGEFS_sys_attr_s *attrs, char *link_target)
254 {
255 	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
256 	int type = orangefs_inode_type(attrs->objtype);
257 	/*
258 	 * If the inode type or symlink target have changed then this
259 	 * inode is stale.
260 	 */
261 	if (type == -1 || !(inode->i_mode & type)) {
262 		orangefs_make_bad_inode(inode);
263 		return 1;
264 	}
265 	if (type == S_IFLNK && strncmp(orangefs_inode->link_target,
266 	    link_target, ORANGEFS_NAME_MAX)) {
267 		orangefs_make_bad_inode(inode);
268 		return 1;
269 	}
270 	return 0;
271 }
272 
273 int orangefs_inode_getattr(struct inode *inode, int new, int bypass,
274     u32 request_mask)
275 {
276 	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
277 	struct orangefs_kernel_op_s *new_op;
278 	loff_t inode_size, rounded_up_size;
279 	int ret, type;
280 
281 	gossip_debug(GOSSIP_UTILS_DEBUG, "%s: called on inode %pU\n", __func__,
282 	    get_khandle_from_ino(inode));
283 
284 	if (!new && !bypass) {
285 		/*
286 		 * Must have all the attributes in the mask and be within cache
287 		 * time.
288 		 */
289 		if ((request_mask & orangefs_inode->getattr_mask) ==
290 		    request_mask &&
291 		    time_before(jiffies, orangefs_inode->getattr_time))
292 			return 0;
293 	}
294 
295 	new_op = op_alloc(ORANGEFS_VFS_OP_GETATTR);
296 	if (!new_op)
297 		return -ENOMEM;
298 	new_op->upcall.req.getattr.refn = orangefs_inode->refn;
299 	/*
300 	 * Size is the hardest attribute to get.  The incremental cost of any
301 	 * other attribute is essentially zero.
302 	 */
303 	if (request_mask & STATX_SIZE || new)
304 		new_op->upcall.req.getattr.mask = ORANGEFS_ATTR_SYS_ALL_NOHINT;
305 	else
306 		new_op->upcall.req.getattr.mask =
307 		    ORANGEFS_ATTR_SYS_ALL_NOHINT & ~ORANGEFS_ATTR_SYS_SIZE;
308 
309 	ret = service_operation(new_op, __func__,
310 	    get_interruptible_flag(inode));
311 	if (ret != 0)
312 		goto out;
313 
314 	if (!new) {
315 		ret = orangefs_inode_is_stale(inode,
316 		    &new_op->downcall.resp.getattr.attributes,
317 		    new_op->downcall.resp.getattr.link_target);
318 		if (ret) {
319 			ret = -ESTALE;
320 			goto out;
321 		}
322 	}
323 
324 	type = orangefs_inode_type(new_op->
325 	    downcall.resp.getattr.attributes.objtype);
326 	switch (type) {
327 	case S_IFREG:
328 		inode->i_flags = orangefs_inode_flags(&new_op->
329 		    downcall.resp.getattr.attributes);
330 		if (request_mask & STATX_SIZE || new) {
331 			inode_size = (loff_t)new_op->
332 			    downcall.resp.getattr.attributes.size;
333 			rounded_up_size =
334 			    (inode_size + (4096 - (inode_size % 4096)));
335 			inode->i_size = inode_size;
336 			orangefs_inode->blksize =
337 			    new_op->downcall.resp.getattr.attributes.blksize;
338 			spin_lock(&inode->i_lock);
339 			inode->i_bytes = inode_size;
340 			inode->i_blocks =
341 			    (unsigned long)(rounded_up_size / 512);
342 			spin_unlock(&inode->i_lock);
343 		}
344 		break;
345 	case S_IFDIR:
346 		if (request_mask & STATX_SIZE || new) {
347 			inode->i_size = PAGE_SIZE;
348 			orangefs_inode->blksize = i_blocksize(inode);
349 			spin_lock(&inode->i_lock);
350 			inode_set_bytes(inode, inode->i_size);
351 			spin_unlock(&inode->i_lock);
352 		}
353 		set_nlink(inode, 1);
354 		break;
355 	case S_IFLNK:
356 		if (new) {
357 			inode->i_size = (loff_t)strlen(new_op->
358 			    downcall.resp.getattr.link_target);
359 			orangefs_inode->blksize = i_blocksize(inode);
360 			ret = strscpy(orangefs_inode->link_target,
361 			    new_op->downcall.resp.getattr.link_target,
362 			    ORANGEFS_NAME_MAX);
363 			if (ret == -E2BIG) {
364 				ret = -EIO;
365 				goto out;
366 			}
367 			inode->i_link = orangefs_inode->link_target;
368 		}
369 		break;
370 	/* i.e. -1 */
371 	default:
372 		/* XXX: ESTALE?  This is what is done if it is not new. */
373 		orangefs_make_bad_inode(inode);
374 		ret = -ESTALE;
375 		goto out;
376 	}
377 
378 	inode->i_uid = make_kuid(&init_user_ns, new_op->
379 	    downcall.resp.getattr.attributes.owner);
380 	inode->i_gid = make_kgid(&init_user_ns, new_op->
381 	    downcall.resp.getattr.attributes.group);
382 	inode->i_atime.tv_sec = (time64_t)new_op->
383 	    downcall.resp.getattr.attributes.atime;
384 	inode->i_mtime.tv_sec = (time64_t)new_op->
385 	    downcall.resp.getattr.attributes.mtime;
386 	inode->i_ctime.tv_sec = (time64_t)new_op->
387 	    downcall.resp.getattr.attributes.ctime;
388 	inode->i_atime.tv_nsec = 0;
389 	inode->i_mtime.tv_nsec = 0;
390 	inode->i_ctime.tv_nsec = 0;
391 
392 	/* special case: mark the root inode as sticky */
393 	inode->i_mode = type | (is_root_handle(inode) ? S_ISVTX : 0) |
394 	    orangefs_inode_perms(&new_op->downcall.resp.getattr.attributes);
395 
396 	orangefs_inode->getattr_time = jiffies +
397 	    orangefs_getattr_timeout_msecs*HZ/1000;
398 	if (request_mask & STATX_SIZE || new)
399 		orangefs_inode->getattr_mask = STATX_BASIC_STATS;
400 	else
401 		orangefs_inode->getattr_mask = STATX_BASIC_STATS & ~STATX_SIZE;
402 	ret = 0;
403 out:
404 	op_release(new_op);
405 	return ret;
406 }
407 
408 int orangefs_inode_check_changed(struct inode *inode)
409 {
410 	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
411 	struct orangefs_kernel_op_s *new_op;
412 	int ret;
413 
414 	gossip_debug(GOSSIP_UTILS_DEBUG, "%s: called on inode %pU\n", __func__,
415 	    get_khandle_from_ino(inode));
416 
417 	new_op = op_alloc(ORANGEFS_VFS_OP_GETATTR);
418 	if (!new_op)
419 		return -ENOMEM;
420 	new_op->upcall.req.getattr.refn = orangefs_inode->refn;
421 	new_op->upcall.req.getattr.mask = ORANGEFS_ATTR_SYS_TYPE |
422 	    ORANGEFS_ATTR_SYS_LNK_TARGET;
423 
424 	ret = service_operation(new_op, __func__,
425 	    get_interruptible_flag(inode));
426 	if (ret != 0)
427 		goto out;
428 
429 	ret = orangefs_inode_is_stale(inode,
430 	    &new_op->downcall.resp.getattr.attributes,
431 	    new_op->downcall.resp.getattr.link_target);
432 out:
433 	op_release(new_op);
434 	return ret;
435 }
436 
437 /*
438  * issues a orangefs setattr request to make sure the new attribute values
439  * take effect if successful.  returns 0 on success; -errno otherwise
440  */
441 int orangefs_inode_setattr(struct inode *inode, struct iattr *iattr)
442 {
443 	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
444 	struct orangefs_kernel_op_s *new_op;
445 	int ret;
446 
447 	new_op = op_alloc(ORANGEFS_VFS_OP_SETATTR);
448 	if (!new_op)
449 		return -ENOMEM;
450 
451 	new_op->upcall.req.setattr.refn = orangefs_inode->refn;
452 	ret = copy_attributes_from_inode(inode,
453 		       &new_op->upcall.req.setattr.attributes,
454 		       iattr);
455 	if (ret >= 0) {
456 		ret = service_operation(new_op, __func__,
457 				get_interruptible_flag(inode));
458 
459 		gossip_debug(GOSSIP_UTILS_DEBUG,
460 			     "orangefs_inode_setattr: returning %d\n",
461 			     ret);
462 	}
463 
464 	op_release(new_op);
465 
466 	if (ret == 0)
467 		orangefs_inode->getattr_time = jiffies - 1;
468 
469 	return ret;
470 }
471 
472 /*
473  * The following is a very dirty hack that is now a permanent part of the
474  * ORANGEFS protocol. See protocol.h for more error definitions.
475  */
476 
477 /* The order matches include/orangefs-types.h in the OrangeFS source. */
478 static int PINT_errno_mapping[] = {
479 	0, EPERM, ENOENT, EINTR, EIO, ENXIO, EBADF, EAGAIN, ENOMEM,
480 	EFAULT, EBUSY, EEXIST, ENODEV, ENOTDIR, EISDIR, EINVAL, EMFILE,
481 	EFBIG, ENOSPC, EROFS, EMLINK, EPIPE, EDEADLK, ENAMETOOLONG,
482 	ENOLCK, ENOSYS, ENOTEMPTY, ELOOP, EWOULDBLOCK, ENOMSG, EUNATCH,
483 	EBADR, EDEADLOCK, ENODATA, ETIME, ENONET, EREMOTE, ECOMM,
484 	EPROTO, EBADMSG, EOVERFLOW, ERESTART, EMSGSIZE, EPROTOTYPE,
485 	ENOPROTOOPT, EPROTONOSUPPORT, EOPNOTSUPP, EADDRINUSE,
486 	EADDRNOTAVAIL, ENETDOWN, ENETUNREACH, ENETRESET, ENOBUFS,
487 	ETIMEDOUT, ECONNREFUSED, EHOSTDOWN, EHOSTUNREACH, EALREADY,
488 	EACCES, ECONNRESET, ERANGE
489 };
490 
491 int orangefs_normalize_to_errno(__s32 error_code)
492 {
493 	__u32 i;
494 
495 	/* Success */
496 	if (error_code == 0) {
497 		return 0;
498 	/*
499 	 * This shouldn't ever happen. If it does it should be fixed on the
500 	 * server.
501 	 */
502 	} else if (error_code > 0) {
503 		gossip_err("orangefs: error status receieved.\n");
504 		gossip_err("orangefs: assuming error code is inverted.\n");
505 		error_code = -error_code;
506 	}
507 
508 	/*
509 	 * XXX: This is very bad since error codes from ORANGEFS may not be
510 	 * suitable for return into userspace.
511 	 */
512 
513 	/*
514 	 * Convert ORANGEFS error values into errno values suitable for return
515 	 * from the kernel.
516 	 */
517 	if ((-error_code) & ORANGEFS_NON_ERRNO_ERROR_BIT) {
518 		if (((-error_code) &
519 		    (ORANGEFS_ERROR_NUMBER_BITS|ORANGEFS_NON_ERRNO_ERROR_BIT|
520 		    ORANGEFS_ERROR_BIT)) == ORANGEFS_ECANCEL) {
521 			/*
522 			 * cancellation error codes generally correspond to
523 			 * a timeout from the client's perspective
524 			 */
525 			error_code = -ETIMEDOUT;
526 		} else {
527 			/* assume a default error code */
528 			gossip_err("orangefs: warning: got error code without errno equivalent: %d.\n", error_code);
529 			error_code = -EINVAL;
530 		}
531 
532 	/* Convert ORANGEFS encoded errno values into regular errno values. */
533 	} else if ((-error_code) & ORANGEFS_ERROR_BIT) {
534 		i = (-error_code) & ~(ORANGEFS_ERROR_BIT|ORANGEFS_ERROR_CLASS_BITS);
535 		if (i < ARRAY_SIZE(PINT_errno_mapping))
536 			error_code = -PINT_errno_mapping[i];
537 		else
538 			error_code = -EINVAL;
539 
540 	/*
541 	 * Only ORANGEFS protocol error codes should ever come here. Otherwise
542 	 * there is a bug somewhere.
543 	 */
544 	} else {
545 		gossip_err("orangefs: orangefs_normalize_to_errno: got error code which is not from ORANGEFS.\n");
546 		error_code = -EINVAL;
547 	}
548 	return error_code;
549 }
550 
551 #define NUM_MODES 11
552 __s32 ORANGEFS_util_translate_mode(int mode)
553 {
554 	int ret = 0;
555 	int i = 0;
556 	static int modes[NUM_MODES] = {
557 		S_IXOTH, S_IWOTH, S_IROTH,
558 		S_IXGRP, S_IWGRP, S_IRGRP,
559 		S_IXUSR, S_IWUSR, S_IRUSR,
560 		S_ISGID, S_ISUID
561 	};
562 	static int orangefs_modes[NUM_MODES] = {
563 		ORANGEFS_O_EXECUTE, ORANGEFS_O_WRITE, ORANGEFS_O_READ,
564 		ORANGEFS_G_EXECUTE, ORANGEFS_G_WRITE, ORANGEFS_G_READ,
565 		ORANGEFS_U_EXECUTE, ORANGEFS_U_WRITE, ORANGEFS_U_READ,
566 		ORANGEFS_G_SGID, ORANGEFS_U_SUID
567 	};
568 
569 	for (i = 0; i < NUM_MODES; i++)
570 		if (mode & modes[i])
571 			ret |= orangefs_modes[i];
572 
573 	return ret;
574 }
575 #undef NUM_MODES
576