xref: /openbmc/linux/fs/orangefs/orangefs-utils.c (revision 56c6eed1)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * (C) 2001 Clemson University and The University of Chicago
4  *
5  * See COPYING in top-level directory.
6  */
7 #include <linux/kernel.h>
8 #include "protocol.h"
9 #include "orangefs-kernel.h"
10 #include "orangefs-dev-proto.h"
11 #include "orangefs-bufmap.h"
12 
13 __s32 fsid_of_op(struct orangefs_kernel_op_s *op)
14 {
15 	__s32 fsid = ORANGEFS_FS_ID_NULL;
16 
17 	if (op) {
18 		switch (op->upcall.type) {
19 		case ORANGEFS_VFS_OP_FILE_IO:
20 			fsid = op->upcall.req.io.refn.fs_id;
21 			break;
22 		case ORANGEFS_VFS_OP_LOOKUP:
23 			fsid = op->upcall.req.lookup.parent_refn.fs_id;
24 			break;
25 		case ORANGEFS_VFS_OP_CREATE:
26 			fsid = op->upcall.req.create.parent_refn.fs_id;
27 			break;
28 		case ORANGEFS_VFS_OP_GETATTR:
29 			fsid = op->upcall.req.getattr.refn.fs_id;
30 			break;
31 		case ORANGEFS_VFS_OP_REMOVE:
32 			fsid = op->upcall.req.remove.parent_refn.fs_id;
33 			break;
34 		case ORANGEFS_VFS_OP_MKDIR:
35 			fsid = op->upcall.req.mkdir.parent_refn.fs_id;
36 			break;
37 		case ORANGEFS_VFS_OP_READDIR:
38 			fsid = op->upcall.req.readdir.refn.fs_id;
39 			break;
40 		case ORANGEFS_VFS_OP_SETATTR:
41 			fsid = op->upcall.req.setattr.refn.fs_id;
42 			break;
43 		case ORANGEFS_VFS_OP_SYMLINK:
44 			fsid = op->upcall.req.sym.parent_refn.fs_id;
45 			break;
46 		case ORANGEFS_VFS_OP_RENAME:
47 			fsid = op->upcall.req.rename.old_parent_refn.fs_id;
48 			break;
49 		case ORANGEFS_VFS_OP_STATFS:
50 			fsid = op->upcall.req.statfs.fs_id;
51 			break;
52 		case ORANGEFS_VFS_OP_TRUNCATE:
53 			fsid = op->upcall.req.truncate.refn.fs_id;
54 			break;
55 		case ORANGEFS_VFS_OP_RA_FLUSH:
56 			fsid = op->upcall.req.ra_cache_flush.refn.fs_id;
57 			break;
58 		case ORANGEFS_VFS_OP_FS_UMOUNT:
59 			fsid = op->upcall.req.fs_umount.fs_id;
60 			break;
61 		case ORANGEFS_VFS_OP_GETXATTR:
62 			fsid = op->upcall.req.getxattr.refn.fs_id;
63 			break;
64 		case ORANGEFS_VFS_OP_SETXATTR:
65 			fsid = op->upcall.req.setxattr.refn.fs_id;
66 			break;
67 		case ORANGEFS_VFS_OP_LISTXATTR:
68 			fsid = op->upcall.req.listxattr.refn.fs_id;
69 			break;
70 		case ORANGEFS_VFS_OP_REMOVEXATTR:
71 			fsid = op->upcall.req.removexattr.refn.fs_id;
72 			break;
73 		case ORANGEFS_VFS_OP_FSYNC:
74 			fsid = op->upcall.req.fsync.refn.fs_id;
75 			break;
76 		default:
77 			break;
78 		}
79 	}
80 	return fsid;
81 }
82 
83 static int orangefs_inode_flags(struct ORANGEFS_sys_attr_s *attrs)
84 {
85 	int flags = 0;
86 	if (attrs->flags & ORANGEFS_IMMUTABLE_FL)
87 		flags |= S_IMMUTABLE;
88 	else
89 		flags &= ~S_IMMUTABLE;
90 	if (attrs->flags & ORANGEFS_APPEND_FL)
91 		flags |= S_APPEND;
92 	else
93 		flags &= ~S_APPEND;
94 	if (attrs->flags & ORANGEFS_NOATIME_FL)
95 		flags |= S_NOATIME;
96 	else
97 		flags &= ~S_NOATIME;
98 	return flags;
99 }
100 
101 static int orangefs_inode_perms(struct ORANGEFS_sys_attr_s *attrs)
102 {
103 	int perm_mode = 0;
104 
105 	if (attrs->perms & ORANGEFS_O_EXECUTE)
106 		perm_mode |= S_IXOTH;
107 	if (attrs->perms & ORANGEFS_O_WRITE)
108 		perm_mode |= S_IWOTH;
109 	if (attrs->perms & ORANGEFS_O_READ)
110 		perm_mode |= S_IROTH;
111 
112 	if (attrs->perms & ORANGEFS_G_EXECUTE)
113 		perm_mode |= S_IXGRP;
114 	if (attrs->perms & ORANGEFS_G_WRITE)
115 		perm_mode |= S_IWGRP;
116 	if (attrs->perms & ORANGEFS_G_READ)
117 		perm_mode |= S_IRGRP;
118 
119 	if (attrs->perms & ORANGEFS_U_EXECUTE)
120 		perm_mode |= S_IXUSR;
121 	if (attrs->perms & ORANGEFS_U_WRITE)
122 		perm_mode |= S_IWUSR;
123 	if (attrs->perms & ORANGEFS_U_READ)
124 		perm_mode |= S_IRUSR;
125 
126 	if (attrs->perms & ORANGEFS_G_SGID)
127 		perm_mode |= S_ISGID;
128 	if (attrs->perms & ORANGEFS_U_SUID)
129 		perm_mode |= S_ISUID;
130 
131 	return perm_mode;
132 }
133 
134 /*
135  * NOTE: in kernel land, we never use the sys_attr->link_target for
136  * anything, so don't bother copying it into the sys_attr object here.
137  */
138 static inline int copy_attributes_from_inode(struct inode *inode,
139 					     struct ORANGEFS_sys_attr_s *attrs,
140 					     struct iattr *iattr)
141 {
142 	umode_t tmp_mode;
143 
144 	if (!iattr || !inode || !attrs) {
145 		gossip_err("NULL iattr (%p), inode (%p), attrs (%p) "
146 			   "in copy_attributes_from_inode!\n",
147 			   iattr,
148 			   inode,
149 			   attrs);
150 		return -EINVAL;
151 	}
152 	/*
153 	 * We need to be careful to only copy the attributes out of the
154 	 * iattr object that we know are valid.
155 	 */
156 	attrs->mask = 0;
157 	if (iattr->ia_valid & ATTR_UID) {
158 		attrs->owner = from_kuid(&init_user_ns, iattr->ia_uid);
159 		attrs->mask |= ORANGEFS_ATTR_SYS_UID;
160 		gossip_debug(GOSSIP_UTILS_DEBUG, "(UID) %d\n", attrs->owner);
161 	}
162 	if (iattr->ia_valid & ATTR_GID) {
163 		attrs->group = from_kgid(&init_user_ns, iattr->ia_gid);
164 		attrs->mask |= ORANGEFS_ATTR_SYS_GID;
165 		gossip_debug(GOSSIP_UTILS_DEBUG, "(GID) %d\n", attrs->group);
166 	}
167 
168 	if (iattr->ia_valid & ATTR_ATIME) {
169 		attrs->mask |= ORANGEFS_ATTR_SYS_ATIME;
170 		if (iattr->ia_valid & ATTR_ATIME_SET) {
171 			attrs->atime = (time64_t)iattr->ia_atime.tv_sec;
172 			attrs->mask |= ORANGEFS_ATTR_SYS_ATIME_SET;
173 		}
174 	}
175 	if (iattr->ia_valid & ATTR_MTIME) {
176 		attrs->mask |= ORANGEFS_ATTR_SYS_MTIME;
177 		if (iattr->ia_valid & ATTR_MTIME_SET) {
178 			attrs->mtime = (time64_t)iattr->ia_mtime.tv_sec;
179 			attrs->mask |= ORANGEFS_ATTR_SYS_MTIME_SET;
180 		}
181 	}
182 	if (iattr->ia_valid & ATTR_CTIME)
183 		attrs->mask |= ORANGEFS_ATTR_SYS_CTIME;
184 
185 	/*
186 	 * ORANGEFS cannot set size with a setattr operation. Probably not
187 	 * likely to be requested through the VFS, but just in case, don't
188 	 * worry about ATTR_SIZE
189 	 */
190 
191 	if (iattr->ia_valid & ATTR_MODE) {
192 		tmp_mode = iattr->ia_mode;
193 		if (tmp_mode & (S_ISVTX)) {
194 			if (is_root_handle(inode)) {
195 				/*
196 				 * allow sticky bit to be set on root (since
197 				 * it shows up that way by default anyhow),
198 				 * but don't show it to the server
199 				 */
200 				tmp_mode -= S_ISVTX;
201 			} else {
202 				gossip_debug(GOSSIP_UTILS_DEBUG,
203 					"%s: setting sticky bit not supported.\n",
204 					__func__);
205 				return -EINVAL;
206 			}
207 		}
208 
209 		if (tmp_mode & (S_ISUID)) {
210 			gossip_debug(GOSSIP_UTILS_DEBUG,
211 				"%s: setting setuid bit not supported.\n",
212 				__func__);
213 			return -EINVAL;
214 		}
215 
216 		attrs->perms = ORANGEFS_util_translate_mode(tmp_mode);
217 		attrs->mask |= ORANGEFS_ATTR_SYS_PERM;
218 	}
219 
220 	return 0;
221 }
222 
223 static int orangefs_inode_type(enum orangefs_ds_type objtype)
224 {
225 	if (objtype == ORANGEFS_TYPE_METAFILE)
226 		return S_IFREG;
227 	else if (objtype == ORANGEFS_TYPE_DIRECTORY)
228 		return S_IFDIR;
229 	else if (objtype == ORANGEFS_TYPE_SYMLINK)
230 		return S_IFLNK;
231 	else
232 		return -1;
233 }
234 
235 static void orangefs_make_bad_inode(struct inode *inode)
236 {
237 	if (is_root_handle(inode)) {
238 		/*
239 		 * if this occurs, the pvfs2-client-core was killed but we
240 		 * can't afford to lose the inode operations and such
241 		 * associated with the root handle in any case.
242 		 */
243 		gossip_debug(GOSSIP_UTILS_DEBUG,
244 			     "*** NOT making bad root inode %pU\n",
245 			     get_khandle_from_ino(inode));
246 	} else {
247 		gossip_debug(GOSSIP_UTILS_DEBUG,
248 			     "*** making bad inode %pU\n",
249 			     get_khandle_from_ino(inode));
250 		make_bad_inode(inode);
251 	}
252 }
253 
254 static int orangefs_inode_is_stale(struct inode *inode,
255     struct ORANGEFS_sys_attr_s *attrs, char *link_target)
256 {
257 	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
258 	int type = orangefs_inode_type(attrs->objtype);
259 	/*
260 	 * If the inode type or symlink target have changed then this
261 	 * inode is stale.
262 	 */
263 	if (type == -1 || !(inode->i_mode & type)) {
264 		orangefs_make_bad_inode(inode);
265 		return 1;
266 	}
267 	if (type == S_IFLNK && strncmp(orangefs_inode->link_target,
268 	    link_target, ORANGEFS_NAME_MAX)) {
269 		orangefs_make_bad_inode(inode);
270 		return 1;
271 	}
272 	return 0;
273 }
274 
275 int orangefs_inode_getattr(struct inode *inode, int new, int bypass,
276     u32 request_mask)
277 {
278 	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
279 	struct orangefs_kernel_op_s *new_op;
280 	loff_t inode_size;
281 	int ret, type;
282 
283 	gossip_debug(GOSSIP_UTILS_DEBUG, "%s: called on inode %pU\n", __func__,
284 	    get_khandle_from_ino(inode));
285 
286 	if (!new && !bypass) {
287 		/*
288 		 * Must have all the attributes in the mask and be within cache
289 		 * time.
290 		 */
291 		if ((request_mask & orangefs_inode->getattr_mask) ==
292 		    request_mask &&
293 		    time_before(jiffies, orangefs_inode->getattr_time))
294 			return 0;
295 	}
296 
297 	new_op = op_alloc(ORANGEFS_VFS_OP_GETATTR);
298 	if (!new_op)
299 		return -ENOMEM;
300 	new_op->upcall.req.getattr.refn = orangefs_inode->refn;
301 	/*
302 	 * Size is the hardest attribute to get.  The incremental cost of any
303 	 * other attribute is essentially zero.
304 	 */
305 	if (request_mask & STATX_SIZE || new)
306 		new_op->upcall.req.getattr.mask = ORANGEFS_ATTR_SYS_ALL_NOHINT;
307 	else
308 		new_op->upcall.req.getattr.mask =
309 		    ORANGEFS_ATTR_SYS_ALL_NOHINT & ~ORANGEFS_ATTR_SYS_SIZE;
310 
311 	ret = service_operation(new_op, __func__,
312 	    get_interruptible_flag(inode));
313 	if (ret != 0)
314 		goto out;
315 
316 	if (!new) {
317 		ret = orangefs_inode_is_stale(inode,
318 		    &new_op->downcall.resp.getattr.attributes,
319 		    new_op->downcall.resp.getattr.link_target);
320 		if (ret) {
321 			ret = -ESTALE;
322 			goto out;
323 		}
324 	}
325 
326 	type = orangefs_inode_type(new_op->
327 	    downcall.resp.getattr.attributes.objtype);
328 	switch (type) {
329 	case S_IFREG:
330 		inode->i_flags = orangefs_inode_flags(&new_op->
331 		    downcall.resp.getattr.attributes);
332 		if (request_mask & STATX_SIZE || new) {
333 			inode_size = (loff_t)new_op->
334 			    downcall.resp.getattr.attributes.size;
335 			inode->i_size = inode_size;
336 			inode->i_blkbits = ffs(new_op->downcall.resp.getattr.
337 			    attributes.blksize);
338 			spin_lock(&inode->i_lock);
339 			inode->i_bytes = inode_size;
340 			inode->i_blocks =
341 			    (inode_size + 512 - inode_size % 512)/512;
342 			spin_unlock(&inode->i_lock);
343 		}
344 		break;
345 	case S_IFDIR:
346 		if (request_mask & STATX_SIZE || new) {
347 			inode->i_size = PAGE_SIZE;
348 			spin_lock(&inode->i_lock);
349 			inode_set_bytes(inode, inode->i_size);
350 			spin_unlock(&inode->i_lock);
351 		}
352 		set_nlink(inode, 1);
353 		break;
354 	case S_IFLNK:
355 		if (new) {
356 			inode->i_size = (loff_t)strlen(new_op->
357 			    downcall.resp.getattr.link_target);
358 			ret = strscpy(orangefs_inode->link_target,
359 			    new_op->downcall.resp.getattr.link_target,
360 			    ORANGEFS_NAME_MAX);
361 			if (ret == -E2BIG) {
362 				ret = -EIO;
363 				goto out;
364 			}
365 			inode->i_link = orangefs_inode->link_target;
366 		}
367 		break;
368 	/* i.e. -1 */
369 	default:
370 		/* XXX: ESTALE?  This is what is done if it is not new. */
371 		orangefs_make_bad_inode(inode);
372 		ret = -ESTALE;
373 		goto out;
374 	}
375 
376 	inode->i_uid = make_kuid(&init_user_ns, new_op->
377 	    downcall.resp.getattr.attributes.owner);
378 	inode->i_gid = make_kgid(&init_user_ns, new_op->
379 	    downcall.resp.getattr.attributes.group);
380 	inode->i_atime.tv_sec = (time64_t)new_op->
381 	    downcall.resp.getattr.attributes.atime;
382 	inode->i_mtime.tv_sec = (time64_t)new_op->
383 	    downcall.resp.getattr.attributes.mtime;
384 	inode->i_ctime.tv_sec = (time64_t)new_op->
385 	    downcall.resp.getattr.attributes.ctime;
386 	inode->i_atime.tv_nsec = 0;
387 	inode->i_mtime.tv_nsec = 0;
388 	inode->i_ctime.tv_nsec = 0;
389 
390 	/* special case: mark the root inode as sticky */
391 	inode->i_mode = type | (is_root_handle(inode) ? S_ISVTX : 0) |
392 	    orangefs_inode_perms(&new_op->downcall.resp.getattr.attributes);
393 
394 	orangefs_inode->getattr_time = jiffies +
395 	    orangefs_getattr_timeout_msecs*HZ/1000;
396 	if (request_mask & STATX_SIZE || new)
397 		orangefs_inode->getattr_mask = STATX_BASIC_STATS;
398 	else
399 		orangefs_inode->getattr_mask = STATX_BASIC_STATS & ~STATX_SIZE;
400 	ret = 0;
401 out:
402 	op_release(new_op);
403 	return ret;
404 }
405 
406 int orangefs_inode_check_changed(struct inode *inode)
407 {
408 	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
409 	struct orangefs_kernel_op_s *new_op;
410 	int ret;
411 
412 	gossip_debug(GOSSIP_UTILS_DEBUG, "%s: called on inode %pU\n", __func__,
413 	    get_khandle_from_ino(inode));
414 
415 	new_op = op_alloc(ORANGEFS_VFS_OP_GETATTR);
416 	if (!new_op)
417 		return -ENOMEM;
418 	new_op->upcall.req.getattr.refn = orangefs_inode->refn;
419 	new_op->upcall.req.getattr.mask = ORANGEFS_ATTR_SYS_TYPE |
420 	    ORANGEFS_ATTR_SYS_LNK_TARGET;
421 
422 	ret = service_operation(new_op, __func__,
423 	    get_interruptible_flag(inode));
424 	if (ret != 0)
425 		goto out;
426 
427 	ret = orangefs_inode_is_stale(inode,
428 	    &new_op->downcall.resp.getattr.attributes,
429 	    new_op->downcall.resp.getattr.link_target);
430 out:
431 	op_release(new_op);
432 	return ret;
433 }
434 
435 /*
436  * issues a orangefs setattr request to make sure the new attribute values
437  * take effect if successful.  returns 0 on success; -errno otherwise
438  */
439 int orangefs_inode_setattr(struct inode *inode, struct iattr *iattr)
440 {
441 	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
442 	struct orangefs_kernel_op_s *new_op;
443 	int ret;
444 
445 	new_op = op_alloc(ORANGEFS_VFS_OP_SETATTR);
446 	if (!new_op)
447 		return -ENOMEM;
448 
449 	new_op->upcall.req.setattr.refn = orangefs_inode->refn;
450 	ret = copy_attributes_from_inode(inode,
451 		       &new_op->upcall.req.setattr.attributes,
452 		       iattr);
453 	if (ret >= 0) {
454 		ret = service_operation(new_op, __func__,
455 				get_interruptible_flag(inode));
456 
457 		gossip_debug(GOSSIP_UTILS_DEBUG,
458 			     "orangefs_inode_setattr: returning %d\n",
459 			     ret);
460 	}
461 
462 	op_release(new_op);
463 
464 	if (ret == 0)
465 		orangefs_inode->getattr_time = jiffies - 1;
466 
467 	return ret;
468 }
469 
470 /*
471  * The following is a very dirty hack that is now a permanent part of the
472  * ORANGEFS protocol. See protocol.h for more error definitions.
473  */
474 
475 /* The order matches include/orangefs-types.h in the OrangeFS source. */
476 static int PINT_errno_mapping[] = {
477 	0, EPERM, ENOENT, EINTR, EIO, ENXIO, EBADF, EAGAIN, ENOMEM,
478 	EFAULT, EBUSY, EEXIST, ENODEV, ENOTDIR, EISDIR, EINVAL, EMFILE,
479 	EFBIG, ENOSPC, EROFS, EMLINK, EPIPE, EDEADLK, ENAMETOOLONG,
480 	ENOLCK, ENOSYS, ENOTEMPTY, ELOOP, EWOULDBLOCK, ENOMSG, EUNATCH,
481 	EBADR, EDEADLOCK, ENODATA, ETIME, ENONET, EREMOTE, ECOMM,
482 	EPROTO, EBADMSG, EOVERFLOW, ERESTART, EMSGSIZE, EPROTOTYPE,
483 	ENOPROTOOPT, EPROTONOSUPPORT, EOPNOTSUPP, EADDRINUSE,
484 	EADDRNOTAVAIL, ENETDOWN, ENETUNREACH, ENETRESET, ENOBUFS,
485 	ETIMEDOUT, ECONNREFUSED, EHOSTDOWN, EHOSTUNREACH, EALREADY,
486 	EACCES, ECONNRESET, ERANGE
487 };
488 
489 int orangefs_normalize_to_errno(__s32 error_code)
490 {
491 	__u32 i;
492 
493 	/* Success */
494 	if (error_code == 0) {
495 		return 0;
496 	/*
497 	 * This shouldn't ever happen. If it does it should be fixed on the
498 	 * server.
499 	 */
500 	} else if (error_code > 0) {
501 		gossip_err("orangefs: error status received.\n");
502 		gossip_err("orangefs: assuming error code is inverted.\n");
503 		error_code = -error_code;
504 	}
505 
506 	/*
507 	 * XXX: This is very bad since error codes from ORANGEFS may not be
508 	 * suitable for return into userspace.
509 	 */
510 
511 	/*
512 	 * Convert ORANGEFS error values into errno values suitable for return
513 	 * from the kernel.
514 	 */
515 	if ((-error_code) & ORANGEFS_NON_ERRNO_ERROR_BIT) {
516 		if (((-error_code) &
517 		    (ORANGEFS_ERROR_NUMBER_BITS|ORANGEFS_NON_ERRNO_ERROR_BIT|
518 		    ORANGEFS_ERROR_BIT)) == ORANGEFS_ECANCEL) {
519 			/*
520 			 * cancellation error codes generally correspond to
521 			 * a timeout from the client's perspective
522 			 */
523 			error_code = -ETIMEDOUT;
524 		} else {
525 			/* assume a default error code */
526 			gossip_err("%s: bad error code :%d:.\n",
527 				__func__,
528 				error_code);
529 			error_code = -EINVAL;
530 		}
531 
532 	/* Convert ORANGEFS encoded errno values into regular errno values. */
533 	} else if ((-error_code) & ORANGEFS_ERROR_BIT) {
534 		i = (-error_code) & ~(ORANGEFS_ERROR_BIT|ORANGEFS_ERROR_CLASS_BITS);
535 		if (i < ARRAY_SIZE(PINT_errno_mapping))
536 			error_code = -PINT_errno_mapping[i];
537 		else
538 			error_code = -EINVAL;
539 
540 	/*
541 	 * Only ORANGEFS protocol error codes should ever come here. Otherwise
542 	 * there is a bug somewhere.
543 	 */
544 	} else {
545 		gossip_err("%s: unknown error code.\n", __func__);
546 		error_code = -EINVAL;
547 	}
548 	return error_code;
549 }
550 
551 #define NUM_MODES 11
552 __s32 ORANGEFS_util_translate_mode(int mode)
553 {
554 	int ret = 0;
555 	int i = 0;
556 	static int modes[NUM_MODES] = {
557 		S_IXOTH, S_IWOTH, S_IROTH,
558 		S_IXGRP, S_IWGRP, S_IRGRP,
559 		S_IXUSR, S_IWUSR, S_IRUSR,
560 		S_ISGID, S_ISUID
561 	};
562 	static int orangefs_modes[NUM_MODES] = {
563 		ORANGEFS_O_EXECUTE, ORANGEFS_O_WRITE, ORANGEFS_O_READ,
564 		ORANGEFS_G_EXECUTE, ORANGEFS_G_WRITE, ORANGEFS_G_READ,
565 		ORANGEFS_U_EXECUTE, ORANGEFS_U_WRITE, ORANGEFS_U_READ,
566 		ORANGEFS_G_SGID, ORANGEFS_U_SUID
567 	};
568 
569 	for (i = 0; i < NUM_MODES; i++)
570 		if (mode & modes[i])
571 			ret |= orangefs_modes[i];
572 
573 	return ret;
574 }
575 #undef NUM_MODES
576