1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * (C) 2001 Clemson University and The University of Chicago 4 * 5 * See COPYING in top-level directory. 6 */ 7 #include <linux/kernel.h> 8 #include "protocol.h" 9 #include "orangefs-kernel.h" 10 #include "orangefs-dev-proto.h" 11 #include "orangefs-bufmap.h" 12 13 __s32 fsid_of_op(struct orangefs_kernel_op_s *op) 14 { 15 __s32 fsid = ORANGEFS_FS_ID_NULL; 16 17 if (op) { 18 switch (op->upcall.type) { 19 case ORANGEFS_VFS_OP_FILE_IO: 20 fsid = op->upcall.req.io.refn.fs_id; 21 break; 22 case ORANGEFS_VFS_OP_LOOKUP: 23 fsid = op->upcall.req.lookup.parent_refn.fs_id; 24 break; 25 case ORANGEFS_VFS_OP_CREATE: 26 fsid = op->upcall.req.create.parent_refn.fs_id; 27 break; 28 case ORANGEFS_VFS_OP_GETATTR: 29 fsid = op->upcall.req.getattr.refn.fs_id; 30 break; 31 case ORANGEFS_VFS_OP_REMOVE: 32 fsid = op->upcall.req.remove.parent_refn.fs_id; 33 break; 34 case ORANGEFS_VFS_OP_MKDIR: 35 fsid = op->upcall.req.mkdir.parent_refn.fs_id; 36 break; 37 case ORANGEFS_VFS_OP_READDIR: 38 fsid = op->upcall.req.readdir.refn.fs_id; 39 break; 40 case ORANGEFS_VFS_OP_SETATTR: 41 fsid = op->upcall.req.setattr.refn.fs_id; 42 break; 43 case ORANGEFS_VFS_OP_SYMLINK: 44 fsid = op->upcall.req.sym.parent_refn.fs_id; 45 break; 46 case ORANGEFS_VFS_OP_RENAME: 47 fsid = op->upcall.req.rename.old_parent_refn.fs_id; 48 break; 49 case ORANGEFS_VFS_OP_STATFS: 50 fsid = op->upcall.req.statfs.fs_id; 51 break; 52 case ORANGEFS_VFS_OP_TRUNCATE: 53 fsid = op->upcall.req.truncate.refn.fs_id; 54 break; 55 case ORANGEFS_VFS_OP_RA_FLUSH: 56 fsid = op->upcall.req.ra_cache_flush.refn.fs_id; 57 break; 58 case ORANGEFS_VFS_OP_FS_UMOUNT: 59 fsid = op->upcall.req.fs_umount.fs_id; 60 break; 61 case ORANGEFS_VFS_OP_GETXATTR: 62 fsid = op->upcall.req.getxattr.refn.fs_id; 63 break; 64 case ORANGEFS_VFS_OP_SETXATTR: 65 fsid = op->upcall.req.setxattr.refn.fs_id; 66 break; 67 case ORANGEFS_VFS_OP_LISTXATTR: 68 fsid = op->upcall.req.listxattr.refn.fs_id; 69 break; 70 case ORANGEFS_VFS_OP_REMOVEXATTR: 71 fsid = op->upcall.req.removexattr.refn.fs_id; 72 break; 73 case ORANGEFS_VFS_OP_FSYNC: 74 fsid = op->upcall.req.fsync.refn.fs_id; 75 break; 76 default: 77 break; 78 } 79 } 80 return fsid; 81 } 82 83 static int orangefs_inode_flags(struct ORANGEFS_sys_attr_s *attrs) 84 { 85 int flags = 0; 86 if (attrs->flags & ORANGEFS_IMMUTABLE_FL) 87 flags |= S_IMMUTABLE; 88 else 89 flags &= ~S_IMMUTABLE; 90 if (attrs->flags & ORANGEFS_APPEND_FL) 91 flags |= S_APPEND; 92 else 93 flags &= ~S_APPEND; 94 if (attrs->flags & ORANGEFS_NOATIME_FL) 95 flags |= S_NOATIME; 96 else 97 flags &= ~S_NOATIME; 98 return flags; 99 } 100 101 static int orangefs_inode_perms(struct ORANGEFS_sys_attr_s *attrs) 102 { 103 int perm_mode = 0; 104 105 if (attrs->perms & ORANGEFS_O_EXECUTE) 106 perm_mode |= S_IXOTH; 107 if (attrs->perms & ORANGEFS_O_WRITE) 108 perm_mode |= S_IWOTH; 109 if (attrs->perms & ORANGEFS_O_READ) 110 perm_mode |= S_IROTH; 111 112 if (attrs->perms & ORANGEFS_G_EXECUTE) 113 perm_mode |= S_IXGRP; 114 if (attrs->perms & ORANGEFS_G_WRITE) 115 perm_mode |= S_IWGRP; 116 if (attrs->perms & ORANGEFS_G_READ) 117 perm_mode |= S_IRGRP; 118 119 if (attrs->perms & ORANGEFS_U_EXECUTE) 120 perm_mode |= S_IXUSR; 121 if (attrs->perms & ORANGEFS_U_WRITE) 122 perm_mode |= S_IWUSR; 123 if (attrs->perms & ORANGEFS_U_READ) 124 perm_mode |= S_IRUSR; 125 126 if (attrs->perms & ORANGEFS_G_SGID) 127 perm_mode |= S_ISGID; 128 if (attrs->perms & ORANGEFS_U_SUID) 129 perm_mode |= S_ISUID; 130 131 return perm_mode; 132 } 133 134 /* 135 * NOTE: in kernel land, we never use the sys_attr->link_target for 136 * anything, so don't bother copying it into the sys_attr object here. 137 */ 138 static inline int copy_attributes_from_inode(struct inode *inode, 139 struct ORANGEFS_sys_attr_s *attrs, 140 struct iattr *iattr) 141 { 142 umode_t tmp_mode; 143 144 if (!iattr || !inode || !attrs) { 145 gossip_err("NULL iattr (%p), inode (%p), attrs (%p) " 146 "in copy_attributes_from_inode!\n", 147 iattr, 148 inode, 149 attrs); 150 return -EINVAL; 151 } 152 /* 153 * We need to be careful to only copy the attributes out of the 154 * iattr object that we know are valid. 155 */ 156 attrs->mask = 0; 157 if (iattr->ia_valid & ATTR_UID) { 158 attrs->owner = from_kuid(&init_user_ns, iattr->ia_uid); 159 attrs->mask |= ORANGEFS_ATTR_SYS_UID; 160 gossip_debug(GOSSIP_UTILS_DEBUG, "(UID) %d\n", attrs->owner); 161 } 162 if (iattr->ia_valid & ATTR_GID) { 163 attrs->group = from_kgid(&init_user_ns, iattr->ia_gid); 164 attrs->mask |= ORANGEFS_ATTR_SYS_GID; 165 gossip_debug(GOSSIP_UTILS_DEBUG, "(GID) %d\n", attrs->group); 166 } 167 168 if (iattr->ia_valid & ATTR_ATIME) { 169 attrs->mask |= ORANGEFS_ATTR_SYS_ATIME; 170 if (iattr->ia_valid & ATTR_ATIME_SET) { 171 attrs->atime = (time64_t)iattr->ia_atime.tv_sec; 172 attrs->mask |= ORANGEFS_ATTR_SYS_ATIME_SET; 173 } 174 } 175 if (iattr->ia_valid & ATTR_MTIME) { 176 attrs->mask |= ORANGEFS_ATTR_SYS_MTIME; 177 if (iattr->ia_valid & ATTR_MTIME_SET) { 178 attrs->mtime = (time64_t)iattr->ia_mtime.tv_sec; 179 attrs->mask |= ORANGEFS_ATTR_SYS_MTIME_SET; 180 } 181 } 182 if (iattr->ia_valid & ATTR_CTIME) 183 attrs->mask |= ORANGEFS_ATTR_SYS_CTIME; 184 185 /* 186 * ORANGEFS cannot set size with a setattr operation. Probably not likely 187 * to be requested through the VFS, but just in case, don't worry about 188 * ATTR_SIZE 189 */ 190 191 if (iattr->ia_valid & ATTR_MODE) { 192 tmp_mode = iattr->ia_mode; 193 if (tmp_mode & (S_ISVTX)) { 194 if (is_root_handle(inode)) { 195 /* 196 * allow sticky bit to be set on root (since 197 * it shows up that way by default anyhow), 198 * but don't show it to the server 199 */ 200 tmp_mode -= S_ISVTX; 201 } else { 202 gossip_debug(GOSSIP_UTILS_DEBUG, 203 "User attempted to set sticky bit on non-root directory; returning EINVAL.\n"); 204 return -EINVAL; 205 } 206 } 207 208 if (tmp_mode & (S_ISUID)) { 209 gossip_debug(GOSSIP_UTILS_DEBUG, 210 "Attempting to set setuid bit (not supported); returning EINVAL.\n"); 211 return -EINVAL; 212 } 213 214 attrs->perms = ORANGEFS_util_translate_mode(tmp_mode); 215 attrs->mask |= ORANGEFS_ATTR_SYS_PERM; 216 } 217 218 return 0; 219 } 220 221 static int orangefs_inode_type(enum orangefs_ds_type objtype) 222 { 223 if (objtype == ORANGEFS_TYPE_METAFILE) 224 return S_IFREG; 225 else if (objtype == ORANGEFS_TYPE_DIRECTORY) 226 return S_IFDIR; 227 else if (objtype == ORANGEFS_TYPE_SYMLINK) 228 return S_IFLNK; 229 else 230 return -1; 231 } 232 233 static void orangefs_make_bad_inode(struct inode *inode) 234 { 235 if (is_root_handle(inode)) { 236 /* 237 * if this occurs, the pvfs2-client-core was killed but we 238 * can't afford to lose the inode operations and such 239 * associated with the root handle in any case. 240 */ 241 gossip_debug(GOSSIP_UTILS_DEBUG, 242 "*** NOT making bad root inode %pU\n", 243 get_khandle_from_ino(inode)); 244 } else { 245 gossip_debug(GOSSIP_UTILS_DEBUG, 246 "*** making bad inode %pU\n", 247 get_khandle_from_ino(inode)); 248 make_bad_inode(inode); 249 } 250 } 251 252 static int orangefs_inode_is_stale(struct inode *inode, 253 struct ORANGEFS_sys_attr_s *attrs, char *link_target) 254 { 255 struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); 256 int type = orangefs_inode_type(attrs->objtype); 257 /* 258 * If the inode type or symlink target have changed then this 259 * inode is stale. 260 */ 261 if (type == -1 || !(inode->i_mode & type)) { 262 orangefs_make_bad_inode(inode); 263 return 1; 264 } 265 if (type == S_IFLNK && strncmp(orangefs_inode->link_target, 266 link_target, ORANGEFS_NAME_MAX)) { 267 orangefs_make_bad_inode(inode); 268 return 1; 269 } 270 return 0; 271 } 272 273 int orangefs_inode_getattr(struct inode *inode, int new, int bypass, 274 u32 request_mask) 275 { 276 struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); 277 struct orangefs_kernel_op_s *new_op; 278 loff_t inode_size, rounded_up_size; 279 int ret, type; 280 281 gossip_debug(GOSSIP_UTILS_DEBUG, "%s: called on inode %pU\n", __func__, 282 get_khandle_from_ino(inode)); 283 284 if (!new && !bypass) { 285 /* 286 * Must have all the attributes in the mask and be within cache 287 * time. 288 */ 289 if ((request_mask & orangefs_inode->getattr_mask) == 290 request_mask && 291 time_before(jiffies, orangefs_inode->getattr_time)) 292 return 0; 293 } 294 295 new_op = op_alloc(ORANGEFS_VFS_OP_GETATTR); 296 if (!new_op) 297 return -ENOMEM; 298 new_op->upcall.req.getattr.refn = orangefs_inode->refn; 299 /* 300 * Size is the hardest attribute to get. The incremental cost of any 301 * other attribute is essentially zero. 302 */ 303 if (request_mask & STATX_SIZE || new) 304 new_op->upcall.req.getattr.mask = ORANGEFS_ATTR_SYS_ALL_NOHINT; 305 else 306 new_op->upcall.req.getattr.mask = 307 ORANGEFS_ATTR_SYS_ALL_NOHINT & ~ORANGEFS_ATTR_SYS_SIZE; 308 309 ret = service_operation(new_op, __func__, 310 get_interruptible_flag(inode)); 311 if (ret != 0) 312 goto out; 313 314 if (!new) { 315 ret = orangefs_inode_is_stale(inode, 316 &new_op->downcall.resp.getattr.attributes, 317 new_op->downcall.resp.getattr.link_target); 318 if (ret) { 319 ret = -ESTALE; 320 goto out; 321 } 322 } 323 324 type = orangefs_inode_type(new_op-> 325 downcall.resp.getattr.attributes.objtype); 326 switch (type) { 327 case S_IFREG: 328 inode->i_flags = orangefs_inode_flags(&new_op-> 329 downcall.resp.getattr.attributes); 330 if (request_mask & STATX_SIZE || new) { 331 inode_size = (loff_t)new_op-> 332 downcall.resp.getattr.attributes.size; 333 rounded_up_size = 334 (inode_size + (4096 - (inode_size % 4096))); 335 inode->i_size = inode_size; 336 orangefs_inode->blksize = 337 new_op->downcall.resp.getattr.attributes.blksize; 338 spin_lock(&inode->i_lock); 339 inode->i_bytes = inode_size; 340 inode->i_blocks = 341 (unsigned long)(rounded_up_size / 512); 342 spin_unlock(&inode->i_lock); 343 } 344 break; 345 case S_IFDIR: 346 if (request_mask & STATX_SIZE || new) { 347 inode->i_size = PAGE_SIZE; 348 orangefs_inode->blksize = i_blocksize(inode); 349 spin_lock(&inode->i_lock); 350 inode_set_bytes(inode, inode->i_size); 351 spin_unlock(&inode->i_lock); 352 } 353 set_nlink(inode, 1); 354 break; 355 case S_IFLNK: 356 if (new) { 357 inode->i_size = (loff_t)strlen(new_op-> 358 downcall.resp.getattr.link_target); 359 orangefs_inode->blksize = i_blocksize(inode); 360 ret = strscpy(orangefs_inode->link_target, 361 new_op->downcall.resp.getattr.link_target, 362 ORANGEFS_NAME_MAX); 363 if (ret == -E2BIG) { 364 ret = -EIO; 365 goto out; 366 } 367 inode->i_link = orangefs_inode->link_target; 368 } 369 break; 370 /* i.e. -1 */ 371 default: 372 /* XXX: ESTALE? This is what is done if it is not new. */ 373 orangefs_make_bad_inode(inode); 374 ret = -ESTALE; 375 goto out; 376 } 377 378 inode->i_uid = make_kuid(&init_user_ns, new_op-> 379 downcall.resp.getattr.attributes.owner); 380 inode->i_gid = make_kgid(&init_user_ns, new_op-> 381 downcall.resp.getattr.attributes.group); 382 inode->i_atime.tv_sec = (time64_t)new_op-> 383 downcall.resp.getattr.attributes.atime; 384 inode->i_mtime.tv_sec = (time64_t)new_op-> 385 downcall.resp.getattr.attributes.mtime; 386 inode->i_ctime.tv_sec = (time64_t)new_op-> 387 downcall.resp.getattr.attributes.ctime; 388 inode->i_atime.tv_nsec = 0; 389 inode->i_mtime.tv_nsec = 0; 390 inode->i_ctime.tv_nsec = 0; 391 392 /* special case: mark the root inode as sticky */ 393 inode->i_mode = type | (is_root_handle(inode) ? S_ISVTX : 0) | 394 orangefs_inode_perms(&new_op->downcall.resp.getattr.attributes); 395 396 orangefs_inode->getattr_time = jiffies + 397 orangefs_getattr_timeout_msecs*HZ/1000; 398 if (request_mask & STATX_SIZE || new) 399 orangefs_inode->getattr_mask = STATX_BASIC_STATS; 400 else 401 orangefs_inode->getattr_mask = STATX_BASIC_STATS & ~STATX_SIZE; 402 ret = 0; 403 out: 404 op_release(new_op); 405 return ret; 406 } 407 408 int orangefs_inode_check_changed(struct inode *inode) 409 { 410 struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); 411 struct orangefs_kernel_op_s *new_op; 412 int ret; 413 414 gossip_debug(GOSSIP_UTILS_DEBUG, "%s: called on inode %pU\n", __func__, 415 get_khandle_from_ino(inode)); 416 417 new_op = op_alloc(ORANGEFS_VFS_OP_GETATTR); 418 if (!new_op) 419 return -ENOMEM; 420 new_op->upcall.req.getattr.refn = orangefs_inode->refn; 421 new_op->upcall.req.getattr.mask = ORANGEFS_ATTR_SYS_TYPE | 422 ORANGEFS_ATTR_SYS_LNK_TARGET; 423 424 ret = service_operation(new_op, __func__, 425 get_interruptible_flag(inode)); 426 if (ret != 0) 427 goto out; 428 429 ret = orangefs_inode_is_stale(inode, 430 &new_op->downcall.resp.getattr.attributes, 431 new_op->downcall.resp.getattr.link_target); 432 out: 433 op_release(new_op); 434 return ret; 435 } 436 437 /* 438 * issues a orangefs setattr request to make sure the new attribute values 439 * take effect if successful. returns 0 on success; -errno otherwise 440 */ 441 int orangefs_inode_setattr(struct inode *inode, struct iattr *iattr) 442 { 443 struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); 444 struct orangefs_kernel_op_s *new_op; 445 int ret; 446 447 new_op = op_alloc(ORANGEFS_VFS_OP_SETATTR); 448 if (!new_op) 449 return -ENOMEM; 450 451 new_op->upcall.req.setattr.refn = orangefs_inode->refn; 452 ret = copy_attributes_from_inode(inode, 453 &new_op->upcall.req.setattr.attributes, 454 iattr); 455 if (ret >= 0) { 456 ret = service_operation(new_op, __func__, 457 get_interruptible_flag(inode)); 458 459 gossip_debug(GOSSIP_UTILS_DEBUG, 460 "orangefs_inode_setattr: returning %d\n", 461 ret); 462 } 463 464 op_release(new_op); 465 466 if (ret == 0) 467 orangefs_inode->getattr_time = jiffies - 1; 468 469 return ret; 470 } 471 472 /* 473 * The following is a very dirty hack that is now a permanent part of the 474 * ORANGEFS protocol. See protocol.h for more error definitions. 475 */ 476 477 /* The order matches include/orangefs-types.h in the OrangeFS source. */ 478 static int PINT_errno_mapping[] = { 479 0, EPERM, ENOENT, EINTR, EIO, ENXIO, EBADF, EAGAIN, ENOMEM, 480 EFAULT, EBUSY, EEXIST, ENODEV, ENOTDIR, EISDIR, EINVAL, EMFILE, 481 EFBIG, ENOSPC, EROFS, EMLINK, EPIPE, EDEADLK, ENAMETOOLONG, 482 ENOLCK, ENOSYS, ENOTEMPTY, ELOOP, EWOULDBLOCK, ENOMSG, EUNATCH, 483 EBADR, EDEADLOCK, ENODATA, ETIME, ENONET, EREMOTE, ECOMM, 484 EPROTO, EBADMSG, EOVERFLOW, ERESTART, EMSGSIZE, EPROTOTYPE, 485 ENOPROTOOPT, EPROTONOSUPPORT, EOPNOTSUPP, EADDRINUSE, 486 EADDRNOTAVAIL, ENETDOWN, ENETUNREACH, ENETRESET, ENOBUFS, 487 ETIMEDOUT, ECONNREFUSED, EHOSTDOWN, EHOSTUNREACH, EALREADY, 488 EACCES, ECONNRESET, ERANGE 489 }; 490 491 int orangefs_normalize_to_errno(__s32 error_code) 492 { 493 __u32 i; 494 495 /* Success */ 496 if (error_code == 0) { 497 return 0; 498 /* 499 * This shouldn't ever happen. If it does it should be fixed on the 500 * server. 501 */ 502 } else if (error_code > 0) { 503 gossip_err("orangefs: error status receieved.\n"); 504 gossip_err("orangefs: assuming error code is inverted.\n"); 505 error_code = -error_code; 506 } 507 508 /* 509 * XXX: This is very bad since error codes from ORANGEFS may not be 510 * suitable for return into userspace. 511 */ 512 513 /* 514 * Convert ORANGEFS error values into errno values suitable for return 515 * from the kernel. 516 */ 517 if ((-error_code) & ORANGEFS_NON_ERRNO_ERROR_BIT) { 518 if (((-error_code) & 519 (ORANGEFS_ERROR_NUMBER_BITS|ORANGEFS_NON_ERRNO_ERROR_BIT| 520 ORANGEFS_ERROR_BIT)) == ORANGEFS_ECANCEL) { 521 /* 522 * cancellation error codes generally correspond to 523 * a timeout from the client's perspective 524 */ 525 error_code = -ETIMEDOUT; 526 } else { 527 /* assume a default error code */ 528 gossip_err("orangefs: warning: got error code without errno equivalent: %d.\n", error_code); 529 error_code = -EINVAL; 530 } 531 532 /* Convert ORANGEFS encoded errno values into regular errno values. */ 533 } else if ((-error_code) & ORANGEFS_ERROR_BIT) { 534 i = (-error_code) & ~(ORANGEFS_ERROR_BIT|ORANGEFS_ERROR_CLASS_BITS); 535 if (i < ARRAY_SIZE(PINT_errno_mapping)) 536 error_code = -PINT_errno_mapping[i]; 537 else 538 error_code = -EINVAL; 539 540 /* 541 * Only ORANGEFS protocol error codes should ever come here. Otherwise 542 * there is a bug somewhere. 543 */ 544 } else { 545 gossip_err("orangefs: orangefs_normalize_to_errno: got error code which is not from ORANGEFS.\n"); 546 error_code = -EINVAL; 547 } 548 return error_code; 549 } 550 551 #define NUM_MODES 11 552 __s32 ORANGEFS_util_translate_mode(int mode) 553 { 554 int ret = 0; 555 int i = 0; 556 static int modes[NUM_MODES] = { 557 S_IXOTH, S_IWOTH, S_IROTH, 558 S_IXGRP, S_IWGRP, S_IRGRP, 559 S_IXUSR, S_IWUSR, S_IRUSR, 560 S_ISGID, S_ISUID 561 }; 562 static int orangefs_modes[NUM_MODES] = { 563 ORANGEFS_O_EXECUTE, ORANGEFS_O_WRITE, ORANGEFS_O_READ, 564 ORANGEFS_G_EXECUTE, ORANGEFS_G_WRITE, ORANGEFS_G_READ, 565 ORANGEFS_U_EXECUTE, ORANGEFS_U_WRITE, ORANGEFS_U_READ, 566 ORANGEFS_G_SGID, ORANGEFS_U_SUID 567 }; 568 569 for (i = 0; i < NUM_MODES; i++) 570 if (mode & modes[i]) 571 ret |= orangefs_modes[i]; 572 573 return ret; 574 } 575 #undef NUM_MODES 576