xref: /openbmc/linux/fs/orangefs/orangefs-utils.c (revision 68198dca)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * (C) 2001 Clemson University and The University of Chicago
4  *
5  * See COPYING in top-level directory.
6  */
7 #include <linux/kernel.h>
8 #include "protocol.h"
9 #include "orangefs-kernel.h"
10 #include "orangefs-dev-proto.h"
11 #include "orangefs-bufmap.h"
12 
13 __s32 fsid_of_op(struct orangefs_kernel_op_s *op)
14 {
15 	__s32 fsid = ORANGEFS_FS_ID_NULL;
16 
17 	if (op) {
18 		switch (op->upcall.type) {
19 		case ORANGEFS_VFS_OP_FILE_IO:
20 			fsid = op->upcall.req.io.refn.fs_id;
21 			break;
22 		case ORANGEFS_VFS_OP_LOOKUP:
23 			fsid = op->upcall.req.lookup.parent_refn.fs_id;
24 			break;
25 		case ORANGEFS_VFS_OP_CREATE:
26 			fsid = op->upcall.req.create.parent_refn.fs_id;
27 			break;
28 		case ORANGEFS_VFS_OP_GETATTR:
29 			fsid = op->upcall.req.getattr.refn.fs_id;
30 			break;
31 		case ORANGEFS_VFS_OP_REMOVE:
32 			fsid = op->upcall.req.remove.parent_refn.fs_id;
33 			break;
34 		case ORANGEFS_VFS_OP_MKDIR:
35 			fsid = op->upcall.req.mkdir.parent_refn.fs_id;
36 			break;
37 		case ORANGEFS_VFS_OP_READDIR:
38 			fsid = op->upcall.req.readdir.refn.fs_id;
39 			break;
40 		case ORANGEFS_VFS_OP_SETATTR:
41 			fsid = op->upcall.req.setattr.refn.fs_id;
42 			break;
43 		case ORANGEFS_VFS_OP_SYMLINK:
44 			fsid = op->upcall.req.sym.parent_refn.fs_id;
45 			break;
46 		case ORANGEFS_VFS_OP_RENAME:
47 			fsid = op->upcall.req.rename.old_parent_refn.fs_id;
48 			break;
49 		case ORANGEFS_VFS_OP_STATFS:
50 			fsid = op->upcall.req.statfs.fs_id;
51 			break;
52 		case ORANGEFS_VFS_OP_TRUNCATE:
53 			fsid = op->upcall.req.truncate.refn.fs_id;
54 			break;
55 		case ORANGEFS_VFS_OP_RA_FLUSH:
56 			fsid = op->upcall.req.ra_cache_flush.refn.fs_id;
57 			break;
58 		case ORANGEFS_VFS_OP_FS_UMOUNT:
59 			fsid = op->upcall.req.fs_umount.fs_id;
60 			break;
61 		case ORANGEFS_VFS_OP_GETXATTR:
62 			fsid = op->upcall.req.getxattr.refn.fs_id;
63 			break;
64 		case ORANGEFS_VFS_OP_SETXATTR:
65 			fsid = op->upcall.req.setxattr.refn.fs_id;
66 			break;
67 		case ORANGEFS_VFS_OP_LISTXATTR:
68 			fsid = op->upcall.req.listxattr.refn.fs_id;
69 			break;
70 		case ORANGEFS_VFS_OP_REMOVEXATTR:
71 			fsid = op->upcall.req.removexattr.refn.fs_id;
72 			break;
73 		case ORANGEFS_VFS_OP_FSYNC:
74 			fsid = op->upcall.req.fsync.refn.fs_id;
75 			break;
76 		default:
77 			break;
78 		}
79 	}
80 	return fsid;
81 }
82 
83 static int orangefs_inode_flags(struct ORANGEFS_sys_attr_s *attrs)
84 {
85 	int flags = 0;
86 	if (attrs->flags & ORANGEFS_IMMUTABLE_FL)
87 		flags |= S_IMMUTABLE;
88 	else
89 		flags &= ~S_IMMUTABLE;
90 	if (attrs->flags & ORANGEFS_APPEND_FL)
91 		flags |= S_APPEND;
92 	else
93 		flags &= ~S_APPEND;
94 	if (attrs->flags & ORANGEFS_NOATIME_FL)
95 		flags |= S_NOATIME;
96 	else
97 		flags &= ~S_NOATIME;
98 	return flags;
99 }
100 
101 static int orangefs_inode_perms(struct ORANGEFS_sys_attr_s *attrs)
102 {
103 	int perm_mode = 0;
104 
105 	if (attrs->perms & ORANGEFS_O_EXECUTE)
106 		perm_mode |= S_IXOTH;
107 	if (attrs->perms & ORANGEFS_O_WRITE)
108 		perm_mode |= S_IWOTH;
109 	if (attrs->perms & ORANGEFS_O_READ)
110 		perm_mode |= S_IROTH;
111 
112 	if (attrs->perms & ORANGEFS_G_EXECUTE)
113 		perm_mode |= S_IXGRP;
114 	if (attrs->perms & ORANGEFS_G_WRITE)
115 		perm_mode |= S_IWGRP;
116 	if (attrs->perms & ORANGEFS_G_READ)
117 		perm_mode |= S_IRGRP;
118 
119 	if (attrs->perms & ORANGEFS_U_EXECUTE)
120 		perm_mode |= S_IXUSR;
121 	if (attrs->perms & ORANGEFS_U_WRITE)
122 		perm_mode |= S_IWUSR;
123 	if (attrs->perms & ORANGEFS_U_READ)
124 		perm_mode |= S_IRUSR;
125 
126 	if (attrs->perms & ORANGEFS_G_SGID)
127 		perm_mode |= S_ISGID;
128 	if (attrs->perms & ORANGEFS_U_SUID)
129 		perm_mode |= S_ISUID;
130 
131 	return perm_mode;
132 }
133 
134 /*
135  * NOTE: in kernel land, we never use the sys_attr->link_target for
136  * anything, so don't bother copying it into the sys_attr object here.
137  */
138 static inline int copy_attributes_from_inode(struct inode *inode,
139 					     struct ORANGEFS_sys_attr_s *attrs,
140 					     struct iattr *iattr)
141 {
142 	umode_t tmp_mode;
143 
144 	if (!iattr || !inode || !attrs) {
145 		gossip_err("NULL iattr (%p), inode (%p), attrs (%p) "
146 			   "in copy_attributes_from_inode!\n",
147 			   iattr,
148 			   inode,
149 			   attrs);
150 		return -EINVAL;
151 	}
152 	/*
153 	 * We need to be careful to only copy the attributes out of the
154 	 * iattr object that we know are valid.
155 	 */
156 	attrs->mask = 0;
157 	if (iattr->ia_valid & ATTR_UID) {
158 		attrs->owner = from_kuid(&init_user_ns, iattr->ia_uid);
159 		attrs->mask |= ORANGEFS_ATTR_SYS_UID;
160 		gossip_debug(GOSSIP_UTILS_DEBUG, "(UID) %d\n", attrs->owner);
161 	}
162 	if (iattr->ia_valid & ATTR_GID) {
163 		attrs->group = from_kgid(&init_user_ns, iattr->ia_gid);
164 		attrs->mask |= ORANGEFS_ATTR_SYS_GID;
165 		gossip_debug(GOSSIP_UTILS_DEBUG, "(GID) %d\n", attrs->group);
166 	}
167 
168 	if (iattr->ia_valid & ATTR_ATIME) {
169 		attrs->mask |= ORANGEFS_ATTR_SYS_ATIME;
170 		if (iattr->ia_valid & ATTR_ATIME_SET) {
171 			attrs->atime = (time64_t)iattr->ia_atime.tv_sec;
172 			attrs->mask |= ORANGEFS_ATTR_SYS_ATIME_SET;
173 		}
174 	}
175 	if (iattr->ia_valid & ATTR_MTIME) {
176 		attrs->mask |= ORANGEFS_ATTR_SYS_MTIME;
177 		if (iattr->ia_valid & ATTR_MTIME_SET) {
178 			attrs->mtime = (time64_t)iattr->ia_mtime.tv_sec;
179 			attrs->mask |= ORANGEFS_ATTR_SYS_MTIME_SET;
180 		}
181 	}
182 	if (iattr->ia_valid & ATTR_CTIME)
183 		attrs->mask |= ORANGEFS_ATTR_SYS_CTIME;
184 
185 	/*
186 	 * ORANGEFS cannot set size with a setattr operation.  Probably not likely
187 	 * to be requested through the VFS, but just in case, don't worry about
188 	 * ATTR_SIZE
189 	 */
190 
191 	if (iattr->ia_valid & ATTR_MODE) {
192 		tmp_mode = iattr->ia_mode;
193 		if (tmp_mode & (S_ISVTX)) {
194 			if (is_root_handle(inode)) {
195 				/*
196 				 * allow sticky bit to be set on root (since
197 				 * it shows up that way by default anyhow),
198 				 * but don't show it to the server
199 				 */
200 				tmp_mode -= S_ISVTX;
201 			} else {
202 				gossip_debug(GOSSIP_UTILS_DEBUG,
203 					     "User attempted to set sticky bit on non-root directory; returning EINVAL.\n");
204 				return -EINVAL;
205 			}
206 		}
207 
208 		if (tmp_mode & (S_ISUID)) {
209 			gossip_debug(GOSSIP_UTILS_DEBUG,
210 				     "Attempting to set setuid bit (not supported); returning EINVAL.\n");
211 			return -EINVAL;
212 		}
213 
214 		attrs->perms = ORANGEFS_util_translate_mode(tmp_mode);
215 		attrs->mask |= ORANGEFS_ATTR_SYS_PERM;
216 	}
217 
218 	return 0;
219 }
220 
221 static int orangefs_inode_type(enum orangefs_ds_type objtype)
222 {
223 	if (objtype == ORANGEFS_TYPE_METAFILE)
224 		return S_IFREG;
225 	else if (objtype == ORANGEFS_TYPE_DIRECTORY)
226 		return S_IFDIR;
227 	else if (objtype == ORANGEFS_TYPE_SYMLINK)
228 		return S_IFLNK;
229 	else
230 		return -1;
231 }
232 
233 static int orangefs_inode_is_stale(struct inode *inode, int new,
234     struct ORANGEFS_sys_attr_s *attrs, char *link_target)
235 {
236 	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
237 	int type = orangefs_inode_type(attrs->objtype);
238 	if (!new) {
239 		/*
240 		 * If the inode type or symlink target have changed then this
241 		 * inode is stale.
242 		 */
243 		if (type == -1 || !(inode->i_mode & type)) {
244 			orangefs_make_bad_inode(inode);
245 			return 1;
246 		}
247 		if (type == S_IFLNK && strncmp(orangefs_inode->link_target,
248 		    link_target, ORANGEFS_NAME_MAX)) {
249 			orangefs_make_bad_inode(inode);
250 			return 1;
251 		}
252 	}
253 	return 0;
254 }
255 
256 int orangefs_inode_getattr(struct inode *inode, int new, int bypass,
257     u32 request_mask)
258 {
259 	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
260 	struct orangefs_kernel_op_s *new_op;
261 	loff_t inode_size, rounded_up_size;
262 	int ret, type;
263 
264 	gossip_debug(GOSSIP_UTILS_DEBUG, "%s: called on inode %pU\n", __func__,
265 	    get_khandle_from_ino(inode));
266 
267 	if (!new && !bypass) {
268 		/*
269 		 * Must have all the attributes in the mask and be within cache
270 		 * time.
271 		 */
272 		if ((request_mask & orangefs_inode->getattr_mask) ==
273 		    request_mask &&
274 		    time_before(jiffies, orangefs_inode->getattr_time))
275 			return 0;
276 	}
277 
278 	new_op = op_alloc(ORANGEFS_VFS_OP_GETATTR);
279 	if (!new_op)
280 		return -ENOMEM;
281 	new_op->upcall.req.getattr.refn = orangefs_inode->refn;
282 	/*
283 	 * Size is the hardest attribute to get.  The incremental cost of any
284 	 * other attribute is essentially zero.
285 	 */
286 	if (request_mask & STATX_SIZE || new)
287 		new_op->upcall.req.getattr.mask = ORANGEFS_ATTR_SYS_ALL_NOHINT;
288 	else
289 		new_op->upcall.req.getattr.mask =
290 		    ORANGEFS_ATTR_SYS_ALL_NOHINT & ~ORANGEFS_ATTR_SYS_SIZE;
291 
292 	ret = service_operation(new_op, __func__,
293 	    get_interruptible_flag(inode));
294 	if (ret != 0)
295 		goto out;
296 
297 	type = orangefs_inode_type(new_op->
298 	    downcall.resp.getattr.attributes.objtype);
299 	ret = orangefs_inode_is_stale(inode, new,
300 	    &new_op->downcall.resp.getattr.attributes,
301 	    new_op->downcall.resp.getattr.link_target);
302 	if (ret) {
303 		ret = -ESTALE;
304 		goto out;
305 	}
306 
307 	switch (type) {
308 	case S_IFREG:
309 		inode->i_flags = orangefs_inode_flags(&new_op->
310 		    downcall.resp.getattr.attributes);
311 		if (request_mask & STATX_SIZE || new) {
312 			inode_size = (loff_t)new_op->
313 			    downcall.resp.getattr.attributes.size;
314 			rounded_up_size =
315 			    (inode_size + (4096 - (inode_size % 4096)));
316 			inode->i_size = inode_size;
317 			orangefs_inode->blksize =
318 			    new_op->downcall.resp.getattr.attributes.blksize;
319 			spin_lock(&inode->i_lock);
320 			inode->i_bytes = inode_size;
321 			inode->i_blocks =
322 			    (unsigned long)(rounded_up_size / 512);
323 			spin_unlock(&inode->i_lock);
324 		}
325 		break;
326 	case S_IFDIR:
327 		if (request_mask & STATX_SIZE || new) {
328 			inode->i_size = PAGE_SIZE;
329 			orangefs_inode->blksize = i_blocksize(inode);
330 			spin_lock(&inode->i_lock);
331 			inode_set_bytes(inode, inode->i_size);
332 			spin_unlock(&inode->i_lock);
333 		}
334 		set_nlink(inode, 1);
335 		break;
336 	case S_IFLNK:
337 		if (new) {
338 			inode->i_size = (loff_t)strlen(new_op->
339 			    downcall.resp.getattr.link_target);
340 			orangefs_inode->blksize = i_blocksize(inode);
341 			ret = strscpy(orangefs_inode->link_target,
342 			    new_op->downcall.resp.getattr.link_target,
343 			    ORANGEFS_NAME_MAX);
344 			if (ret == -E2BIG) {
345 				ret = -EIO;
346 				goto out;
347 			}
348 			inode->i_link = orangefs_inode->link_target;
349 		}
350 		break;
351 	}
352 
353 	inode->i_uid = make_kuid(&init_user_ns, new_op->
354 	    downcall.resp.getattr.attributes.owner);
355 	inode->i_gid = make_kgid(&init_user_ns, new_op->
356 	    downcall.resp.getattr.attributes.group);
357 	inode->i_atime.tv_sec = (time64_t)new_op->
358 	    downcall.resp.getattr.attributes.atime;
359 	inode->i_mtime.tv_sec = (time64_t)new_op->
360 	    downcall.resp.getattr.attributes.mtime;
361 	inode->i_ctime.tv_sec = (time64_t)new_op->
362 	    downcall.resp.getattr.attributes.ctime;
363 	inode->i_atime.tv_nsec = 0;
364 	inode->i_mtime.tv_nsec = 0;
365 	inode->i_ctime.tv_nsec = 0;
366 
367 	/* special case: mark the root inode as sticky */
368 	inode->i_mode = type | (is_root_handle(inode) ? S_ISVTX : 0) |
369 	    orangefs_inode_perms(&new_op->downcall.resp.getattr.attributes);
370 
371 	orangefs_inode->getattr_time = jiffies +
372 	    orangefs_getattr_timeout_msecs*HZ/1000;
373 	if (request_mask & STATX_SIZE || new)
374 		orangefs_inode->getattr_mask = STATX_BASIC_STATS;
375 	else
376 		orangefs_inode->getattr_mask = STATX_BASIC_STATS & ~STATX_SIZE;
377 	ret = 0;
378 out:
379 	op_release(new_op);
380 	return ret;
381 }
382 
383 int orangefs_inode_check_changed(struct inode *inode)
384 {
385 	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
386 	struct orangefs_kernel_op_s *new_op;
387 	int ret;
388 
389 	gossip_debug(GOSSIP_UTILS_DEBUG, "%s: called on inode %pU\n", __func__,
390 	    get_khandle_from_ino(inode));
391 
392 	new_op = op_alloc(ORANGEFS_VFS_OP_GETATTR);
393 	if (!new_op)
394 		return -ENOMEM;
395 	new_op->upcall.req.getattr.refn = orangefs_inode->refn;
396 	new_op->upcall.req.getattr.mask = ORANGEFS_ATTR_SYS_TYPE |
397 	    ORANGEFS_ATTR_SYS_LNK_TARGET;
398 
399 	ret = service_operation(new_op, __func__,
400 	    get_interruptible_flag(inode));
401 	if (ret != 0)
402 		goto out;
403 
404 	ret = orangefs_inode_is_stale(inode, 0,
405 	    &new_op->downcall.resp.getattr.attributes,
406 	    new_op->downcall.resp.getattr.link_target);
407 out:
408 	op_release(new_op);
409 	return ret;
410 }
411 
412 /*
413  * issues a orangefs setattr request to make sure the new attribute values
414  * take effect if successful.  returns 0 on success; -errno otherwise
415  */
416 int orangefs_inode_setattr(struct inode *inode, struct iattr *iattr)
417 {
418 	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
419 	struct orangefs_kernel_op_s *new_op;
420 	int ret;
421 
422 	new_op = op_alloc(ORANGEFS_VFS_OP_SETATTR);
423 	if (!new_op)
424 		return -ENOMEM;
425 
426 	new_op->upcall.req.setattr.refn = orangefs_inode->refn;
427 	ret = copy_attributes_from_inode(inode,
428 		       &new_op->upcall.req.setattr.attributes,
429 		       iattr);
430 	if (ret >= 0) {
431 		ret = service_operation(new_op, __func__,
432 				get_interruptible_flag(inode));
433 
434 		gossip_debug(GOSSIP_UTILS_DEBUG,
435 			     "orangefs_inode_setattr: returning %d\n",
436 			     ret);
437 	}
438 
439 	op_release(new_op);
440 
441 	if (ret == 0)
442 		orangefs_inode->getattr_time = jiffies - 1;
443 
444 	return ret;
445 }
446 
447 void orangefs_make_bad_inode(struct inode *inode)
448 {
449 	if (is_root_handle(inode)) {
450 		/*
451 		 * if this occurs, the pvfs2-client-core was killed but we
452 		 * can't afford to lose the inode operations and such
453 		 * associated with the root handle in any case.
454 		 */
455 		gossip_debug(GOSSIP_UTILS_DEBUG,
456 			     "*** NOT making bad root inode %pU\n",
457 			     get_khandle_from_ino(inode));
458 	} else {
459 		gossip_debug(GOSSIP_UTILS_DEBUG,
460 			     "*** making bad inode %pU\n",
461 			     get_khandle_from_ino(inode));
462 		make_bad_inode(inode);
463 	}
464 }
465 
466 /*
467  * The following is a very dirty hack that is now a permanent part of the
468  * ORANGEFS protocol. See protocol.h for more error definitions.
469  */
470 
471 /* The order matches include/orangefs-types.h in the OrangeFS source. */
472 static int PINT_errno_mapping[] = {
473 	0, EPERM, ENOENT, EINTR, EIO, ENXIO, EBADF, EAGAIN, ENOMEM,
474 	EFAULT, EBUSY, EEXIST, ENODEV, ENOTDIR, EISDIR, EINVAL, EMFILE,
475 	EFBIG, ENOSPC, EROFS, EMLINK, EPIPE, EDEADLK, ENAMETOOLONG,
476 	ENOLCK, ENOSYS, ENOTEMPTY, ELOOP, EWOULDBLOCK, ENOMSG, EUNATCH,
477 	EBADR, EDEADLOCK, ENODATA, ETIME, ENONET, EREMOTE, ECOMM,
478 	EPROTO, EBADMSG, EOVERFLOW, ERESTART, EMSGSIZE, EPROTOTYPE,
479 	ENOPROTOOPT, EPROTONOSUPPORT, EOPNOTSUPP, EADDRINUSE,
480 	EADDRNOTAVAIL, ENETDOWN, ENETUNREACH, ENETRESET, ENOBUFS,
481 	ETIMEDOUT, ECONNREFUSED, EHOSTDOWN, EHOSTUNREACH, EALREADY,
482 	EACCES, ECONNRESET, ERANGE
483 };
484 
485 int orangefs_normalize_to_errno(__s32 error_code)
486 {
487 	__u32 i;
488 
489 	/* Success */
490 	if (error_code == 0) {
491 		return 0;
492 	/*
493 	 * This shouldn't ever happen. If it does it should be fixed on the
494 	 * server.
495 	 */
496 	} else if (error_code > 0) {
497 		gossip_err("orangefs: error status receieved.\n");
498 		gossip_err("orangefs: assuming error code is inverted.\n");
499 		error_code = -error_code;
500 	}
501 
502 	/*
503 	 * XXX: This is very bad since error codes from ORANGEFS may not be
504 	 * suitable for return into userspace.
505 	 */
506 
507 	/*
508 	 * Convert ORANGEFS error values into errno values suitable for return
509 	 * from the kernel.
510 	 */
511 	if ((-error_code) & ORANGEFS_NON_ERRNO_ERROR_BIT) {
512 		if (((-error_code) &
513 		    (ORANGEFS_ERROR_NUMBER_BITS|ORANGEFS_NON_ERRNO_ERROR_BIT|
514 		    ORANGEFS_ERROR_BIT)) == ORANGEFS_ECANCEL) {
515 			/*
516 			 * cancellation error codes generally correspond to
517 			 * a timeout from the client's perspective
518 			 */
519 			error_code = -ETIMEDOUT;
520 		} else {
521 			/* assume a default error code */
522 			gossip_err("orangefs: warning: got error code without errno equivalent: %d.\n", error_code);
523 			error_code = -EINVAL;
524 		}
525 
526 	/* Convert ORANGEFS encoded errno values into regular errno values. */
527 	} else if ((-error_code) & ORANGEFS_ERROR_BIT) {
528 		i = (-error_code) & ~(ORANGEFS_ERROR_BIT|ORANGEFS_ERROR_CLASS_BITS);
529 		if (i < ARRAY_SIZE(PINT_errno_mapping))
530 			error_code = -PINT_errno_mapping[i];
531 		else
532 			error_code = -EINVAL;
533 
534 	/*
535 	 * Only ORANGEFS protocol error codes should ever come here. Otherwise
536 	 * there is a bug somewhere.
537 	 */
538 	} else {
539 		gossip_err("orangefs: orangefs_normalize_to_errno: got error code which is not from ORANGEFS.\n");
540 	}
541 	return error_code;
542 }
543 
544 #define NUM_MODES 11
545 __s32 ORANGEFS_util_translate_mode(int mode)
546 {
547 	int ret = 0;
548 	int i = 0;
549 	static int modes[NUM_MODES] = {
550 		S_IXOTH, S_IWOTH, S_IROTH,
551 		S_IXGRP, S_IWGRP, S_IRGRP,
552 		S_IXUSR, S_IWUSR, S_IRUSR,
553 		S_ISGID, S_ISUID
554 	};
555 	static int orangefs_modes[NUM_MODES] = {
556 		ORANGEFS_O_EXECUTE, ORANGEFS_O_WRITE, ORANGEFS_O_READ,
557 		ORANGEFS_G_EXECUTE, ORANGEFS_G_WRITE, ORANGEFS_G_READ,
558 		ORANGEFS_U_EXECUTE, ORANGEFS_U_WRITE, ORANGEFS_U_READ,
559 		ORANGEFS_G_SGID, ORANGEFS_U_SUID
560 	};
561 
562 	for (i = 0; i < NUM_MODES; i++)
563 		if (mode & modes[i])
564 			ret |= orangefs_modes[i];
565 
566 	return ret;
567 }
568 #undef NUM_MODES
569