1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * (C) 2001 Clemson University and The University of Chicago 4 * 5 * See COPYING in top-level directory. 6 */ 7 #include <linux/kernel.h> 8 #include "protocol.h" 9 #include "orangefs-kernel.h" 10 #include "orangefs-dev-proto.h" 11 #include "orangefs-bufmap.h" 12 13 __s32 fsid_of_op(struct orangefs_kernel_op_s *op) 14 { 15 __s32 fsid = ORANGEFS_FS_ID_NULL; 16 17 if (op) { 18 switch (op->upcall.type) { 19 case ORANGEFS_VFS_OP_FILE_IO: 20 fsid = op->upcall.req.io.refn.fs_id; 21 break; 22 case ORANGEFS_VFS_OP_LOOKUP: 23 fsid = op->upcall.req.lookup.parent_refn.fs_id; 24 break; 25 case ORANGEFS_VFS_OP_CREATE: 26 fsid = op->upcall.req.create.parent_refn.fs_id; 27 break; 28 case ORANGEFS_VFS_OP_GETATTR: 29 fsid = op->upcall.req.getattr.refn.fs_id; 30 break; 31 case ORANGEFS_VFS_OP_REMOVE: 32 fsid = op->upcall.req.remove.parent_refn.fs_id; 33 break; 34 case ORANGEFS_VFS_OP_MKDIR: 35 fsid = op->upcall.req.mkdir.parent_refn.fs_id; 36 break; 37 case ORANGEFS_VFS_OP_READDIR: 38 fsid = op->upcall.req.readdir.refn.fs_id; 39 break; 40 case ORANGEFS_VFS_OP_SETATTR: 41 fsid = op->upcall.req.setattr.refn.fs_id; 42 break; 43 case ORANGEFS_VFS_OP_SYMLINK: 44 fsid = op->upcall.req.sym.parent_refn.fs_id; 45 break; 46 case ORANGEFS_VFS_OP_RENAME: 47 fsid = op->upcall.req.rename.old_parent_refn.fs_id; 48 break; 49 case ORANGEFS_VFS_OP_STATFS: 50 fsid = op->upcall.req.statfs.fs_id; 51 break; 52 case ORANGEFS_VFS_OP_TRUNCATE: 53 fsid = op->upcall.req.truncate.refn.fs_id; 54 break; 55 case ORANGEFS_VFS_OP_RA_FLUSH: 56 fsid = op->upcall.req.ra_cache_flush.refn.fs_id; 57 break; 58 case ORANGEFS_VFS_OP_FS_UMOUNT: 59 fsid = op->upcall.req.fs_umount.fs_id; 60 break; 61 case ORANGEFS_VFS_OP_GETXATTR: 62 fsid = op->upcall.req.getxattr.refn.fs_id; 63 break; 64 case ORANGEFS_VFS_OP_SETXATTR: 65 fsid = op->upcall.req.setxattr.refn.fs_id; 66 break; 67 case ORANGEFS_VFS_OP_LISTXATTR: 68 fsid = op->upcall.req.listxattr.refn.fs_id; 69 break; 70 case ORANGEFS_VFS_OP_REMOVEXATTR: 71 fsid = op->upcall.req.removexattr.refn.fs_id; 72 break; 73 case ORANGEFS_VFS_OP_FSYNC: 74 fsid = op->upcall.req.fsync.refn.fs_id; 75 break; 76 default: 77 break; 78 } 79 } 80 return fsid; 81 } 82 83 static int orangefs_inode_flags(struct ORANGEFS_sys_attr_s *attrs) 84 { 85 int flags = 0; 86 if (attrs->flags & ORANGEFS_IMMUTABLE_FL) 87 flags |= S_IMMUTABLE; 88 else 89 flags &= ~S_IMMUTABLE; 90 if (attrs->flags & ORANGEFS_APPEND_FL) 91 flags |= S_APPEND; 92 else 93 flags &= ~S_APPEND; 94 if (attrs->flags & ORANGEFS_NOATIME_FL) 95 flags |= S_NOATIME; 96 else 97 flags &= ~S_NOATIME; 98 return flags; 99 } 100 101 static int orangefs_inode_perms(struct ORANGEFS_sys_attr_s *attrs) 102 { 103 int perm_mode = 0; 104 105 if (attrs->perms & ORANGEFS_O_EXECUTE) 106 perm_mode |= S_IXOTH; 107 if (attrs->perms & ORANGEFS_O_WRITE) 108 perm_mode |= S_IWOTH; 109 if (attrs->perms & ORANGEFS_O_READ) 110 perm_mode |= S_IROTH; 111 112 if (attrs->perms & ORANGEFS_G_EXECUTE) 113 perm_mode |= S_IXGRP; 114 if (attrs->perms & ORANGEFS_G_WRITE) 115 perm_mode |= S_IWGRP; 116 if (attrs->perms & ORANGEFS_G_READ) 117 perm_mode |= S_IRGRP; 118 119 if (attrs->perms & ORANGEFS_U_EXECUTE) 120 perm_mode |= S_IXUSR; 121 if (attrs->perms & ORANGEFS_U_WRITE) 122 perm_mode |= S_IWUSR; 123 if (attrs->perms & ORANGEFS_U_READ) 124 perm_mode |= S_IRUSR; 125 126 if (attrs->perms & ORANGEFS_G_SGID) 127 perm_mode |= S_ISGID; 128 if (attrs->perms & ORANGEFS_U_SUID) 129 perm_mode |= S_ISUID; 130 131 return perm_mode; 132 } 133 134 /* 135 * NOTE: in kernel land, we never use the sys_attr->link_target for 136 * anything, so don't bother copying it into the sys_attr object here. 137 */ 138 static inline int copy_attributes_from_inode(struct inode *inode, 139 struct ORANGEFS_sys_attr_s *attrs, 140 struct iattr *iattr) 141 { 142 umode_t tmp_mode; 143 144 if (!iattr || !inode || !attrs) { 145 gossip_err("NULL iattr (%p), inode (%p), attrs (%p) " 146 "in copy_attributes_from_inode!\n", 147 iattr, 148 inode, 149 attrs); 150 return -EINVAL; 151 } 152 /* 153 * We need to be careful to only copy the attributes out of the 154 * iattr object that we know are valid. 155 */ 156 attrs->mask = 0; 157 if (iattr->ia_valid & ATTR_UID) { 158 attrs->owner = from_kuid(&init_user_ns, iattr->ia_uid); 159 attrs->mask |= ORANGEFS_ATTR_SYS_UID; 160 gossip_debug(GOSSIP_UTILS_DEBUG, "(UID) %d\n", attrs->owner); 161 } 162 if (iattr->ia_valid & ATTR_GID) { 163 attrs->group = from_kgid(&init_user_ns, iattr->ia_gid); 164 attrs->mask |= ORANGEFS_ATTR_SYS_GID; 165 gossip_debug(GOSSIP_UTILS_DEBUG, "(GID) %d\n", attrs->group); 166 } 167 168 if (iattr->ia_valid & ATTR_ATIME) { 169 attrs->mask |= ORANGEFS_ATTR_SYS_ATIME; 170 if (iattr->ia_valid & ATTR_ATIME_SET) { 171 attrs->atime = (time64_t)iattr->ia_atime.tv_sec; 172 attrs->mask |= ORANGEFS_ATTR_SYS_ATIME_SET; 173 } 174 } 175 if (iattr->ia_valid & ATTR_MTIME) { 176 attrs->mask |= ORANGEFS_ATTR_SYS_MTIME; 177 if (iattr->ia_valid & ATTR_MTIME_SET) { 178 attrs->mtime = (time64_t)iattr->ia_mtime.tv_sec; 179 attrs->mask |= ORANGEFS_ATTR_SYS_MTIME_SET; 180 } 181 } 182 if (iattr->ia_valid & ATTR_CTIME) 183 attrs->mask |= ORANGEFS_ATTR_SYS_CTIME; 184 185 /* 186 * ORANGEFS cannot set size with a setattr operation. Probably not likely 187 * to be requested through the VFS, but just in case, don't worry about 188 * ATTR_SIZE 189 */ 190 191 if (iattr->ia_valid & ATTR_MODE) { 192 tmp_mode = iattr->ia_mode; 193 if (tmp_mode & (S_ISVTX)) { 194 if (is_root_handle(inode)) { 195 /* 196 * allow sticky bit to be set on root (since 197 * it shows up that way by default anyhow), 198 * but don't show it to the server 199 */ 200 tmp_mode -= S_ISVTX; 201 } else { 202 gossip_debug(GOSSIP_UTILS_DEBUG, 203 "User attempted to set sticky bit on non-root directory; returning EINVAL.\n"); 204 return -EINVAL; 205 } 206 } 207 208 if (tmp_mode & (S_ISUID)) { 209 gossip_debug(GOSSIP_UTILS_DEBUG, 210 "Attempting to set setuid bit (not supported); returning EINVAL.\n"); 211 return -EINVAL; 212 } 213 214 attrs->perms = ORANGEFS_util_translate_mode(tmp_mode); 215 attrs->mask |= ORANGEFS_ATTR_SYS_PERM; 216 } 217 218 return 0; 219 } 220 221 static int orangefs_inode_type(enum orangefs_ds_type objtype) 222 { 223 if (objtype == ORANGEFS_TYPE_METAFILE) 224 return S_IFREG; 225 else if (objtype == ORANGEFS_TYPE_DIRECTORY) 226 return S_IFDIR; 227 else if (objtype == ORANGEFS_TYPE_SYMLINK) 228 return S_IFLNK; 229 else 230 return -1; 231 } 232 233 static int orangefs_inode_is_stale(struct inode *inode, int new, 234 struct ORANGEFS_sys_attr_s *attrs, char *link_target) 235 { 236 struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); 237 int type = orangefs_inode_type(attrs->objtype); 238 if (!new) { 239 /* 240 * If the inode type or symlink target have changed then this 241 * inode is stale. 242 */ 243 if (type == -1 || !(inode->i_mode & type)) { 244 orangefs_make_bad_inode(inode); 245 return 1; 246 } 247 if (type == S_IFLNK && strncmp(orangefs_inode->link_target, 248 link_target, ORANGEFS_NAME_MAX)) { 249 orangefs_make_bad_inode(inode); 250 return 1; 251 } 252 } 253 return 0; 254 } 255 256 int orangefs_inode_getattr(struct inode *inode, int new, int bypass, 257 u32 request_mask) 258 { 259 struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); 260 struct orangefs_kernel_op_s *new_op; 261 loff_t inode_size, rounded_up_size; 262 int ret, type; 263 264 gossip_debug(GOSSIP_UTILS_DEBUG, "%s: called on inode %pU\n", __func__, 265 get_khandle_from_ino(inode)); 266 267 if (!new && !bypass) { 268 /* 269 * Must have all the attributes in the mask and be within cache 270 * time. 271 */ 272 if ((request_mask & orangefs_inode->getattr_mask) == 273 request_mask && 274 time_before(jiffies, orangefs_inode->getattr_time)) 275 return 0; 276 } 277 278 new_op = op_alloc(ORANGEFS_VFS_OP_GETATTR); 279 if (!new_op) 280 return -ENOMEM; 281 new_op->upcall.req.getattr.refn = orangefs_inode->refn; 282 /* 283 * Size is the hardest attribute to get. The incremental cost of any 284 * other attribute is essentially zero. 285 */ 286 if (request_mask & STATX_SIZE || new) 287 new_op->upcall.req.getattr.mask = ORANGEFS_ATTR_SYS_ALL_NOHINT; 288 else 289 new_op->upcall.req.getattr.mask = 290 ORANGEFS_ATTR_SYS_ALL_NOHINT & ~ORANGEFS_ATTR_SYS_SIZE; 291 292 ret = service_operation(new_op, __func__, 293 get_interruptible_flag(inode)); 294 if (ret != 0) 295 goto out; 296 297 type = orangefs_inode_type(new_op-> 298 downcall.resp.getattr.attributes.objtype); 299 ret = orangefs_inode_is_stale(inode, new, 300 &new_op->downcall.resp.getattr.attributes, 301 new_op->downcall.resp.getattr.link_target); 302 if (ret) { 303 ret = -ESTALE; 304 goto out; 305 } 306 307 switch (type) { 308 case S_IFREG: 309 inode->i_flags = orangefs_inode_flags(&new_op-> 310 downcall.resp.getattr.attributes); 311 if (request_mask & STATX_SIZE || new) { 312 inode_size = (loff_t)new_op-> 313 downcall.resp.getattr.attributes.size; 314 rounded_up_size = 315 (inode_size + (4096 - (inode_size % 4096))); 316 inode->i_size = inode_size; 317 orangefs_inode->blksize = 318 new_op->downcall.resp.getattr.attributes.blksize; 319 spin_lock(&inode->i_lock); 320 inode->i_bytes = inode_size; 321 inode->i_blocks = 322 (unsigned long)(rounded_up_size / 512); 323 spin_unlock(&inode->i_lock); 324 } 325 break; 326 case S_IFDIR: 327 if (request_mask & STATX_SIZE || new) { 328 inode->i_size = PAGE_SIZE; 329 orangefs_inode->blksize = i_blocksize(inode); 330 spin_lock(&inode->i_lock); 331 inode_set_bytes(inode, inode->i_size); 332 spin_unlock(&inode->i_lock); 333 } 334 set_nlink(inode, 1); 335 break; 336 case S_IFLNK: 337 if (new) { 338 inode->i_size = (loff_t)strlen(new_op-> 339 downcall.resp.getattr.link_target); 340 orangefs_inode->blksize = i_blocksize(inode); 341 ret = strscpy(orangefs_inode->link_target, 342 new_op->downcall.resp.getattr.link_target, 343 ORANGEFS_NAME_MAX); 344 if (ret == -E2BIG) { 345 ret = -EIO; 346 goto out; 347 } 348 inode->i_link = orangefs_inode->link_target; 349 } 350 break; 351 } 352 353 inode->i_uid = make_kuid(&init_user_ns, new_op-> 354 downcall.resp.getattr.attributes.owner); 355 inode->i_gid = make_kgid(&init_user_ns, new_op-> 356 downcall.resp.getattr.attributes.group); 357 inode->i_atime.tv_sec = (time64_t)new_op-> 358 downcall.resp.getattr.attributes.atime; 359 inode->i_mtime.tv_sec = (time64_t)new_op-> 360 downcall.resp.getattr.attributes.mtime; 361 inode->i_ctime.tv_sec = (time64_t)new_op-> 362 downcall.resp.getattr.attributes.ctime; 363 inode->i_atime.tv_nsec = 0; 364 inode->i_mtime.tv_nsec = 0; 365 inode->i_ctime.tv_nsec = 0; 366 367 /* special case: mark the root inode as sticky */ 368 inode->i_mode = type | (is_root_handle(inode) ? S_ISVTX : 0) | 369 orangefs_inode_perms(&new_op->downcall.resp.getattr.attributes); 370 371 orangefs_inode->getattr_time = jiffies + 372 orangefs_getattr_timeout_msecs*HZ/1000; 373 if (request_mask & STATX_SIZE || new) 374 orangefs_inode->getattr_mask = STATX_BASIC_STATS; 375 else 376 orangefs_inode->getattr_mask = STATX_BASIC_STATS & ~STATX_SIZE; 377 ret = 0; 378 out: 379 op_release(new_op); 380 return ret; 381 } 382 383 int orangefs_inode_check_changed(struct inode *inode) 384 { 385 struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); 386 struct orangefs_kernel_op_s *new_op; 387 int ret; 388 389 gossip_debug(GOSSIP_UTILS_DEBUG, "%s: called on inode %pU\n", __func__, 390 get_khandle_from_ino(inode)); 391 392 new_op = op_alloc(ORANGEFS_VFS_OP_GETATTR); 393 if (!new_op) 394 return -ENOMEM; 395 new_op->upcall.req.getattr.refn = orangefs_inode->refn; 396 new_op->upcall.req.getattr.mask = ORANGEFS_ATTR_SYS_TYPE | 397 ORANGEFS_ATTR_SYS_LNK_TARGET; 398 399 ret = service_operation(new_op, __func__, 400 get_interruptible_flag(inode)); 401 if (ret != 0) 402 goto out; 403 404 ret = orangefs_inode_is_stale(inode, 0, 405 &new_op->downcall.resp.getattr.attributes, 406 new_op->downcall.resp.getattr.link_target); 407 out: 408 op_release(new_op); 409 return ret; 410 } 411 412 /* 413 * issues a orangefs setattr request to make sure the new attribute values 414 * take effect if successful. returns 0 on success; -errno otherwise 415 */ 416 int orangefs_inode_setattr(struct inode *inode, struct iattr *iattr) 417 { 418 struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); 419 struct orangefs_kernel_op_s *new_op; 420 int ret; 421 422 new_op = op_alloc(ORANGEFS_VFS_OP_SETATTR); 423 if (!new_op) 424 return -ENOMEM; 425 426 new_op->upcall.req.setattr.refn = orangefs_inode->refn; 427 ret = copy_attributes_from_inode(inode, 428 &new_op->upcall.req.setattr.attributes, 429 iattr); 430 if (ret >= 0) { 431 ret = service_operation(new_op, __func__, 432 get_interruptible_flag(inode)); 433 434 gossip_debug(GOSSIP_UTILS_DEBUG, 435 "orangefs_inode_setattr: returning %d\n", 436 ret); 437 } 438 439 op_release(new_op); 440 441 if (ret == 0) 442 orangefs_inode->getattr_time = jiffies - 1; 443 444 return ret; 445 } 446 447 void orangefs_make_bad_inode(struct inode *inode) 448 { 449 if (is_root_handle(inode)) { 450 /* 451 * if this occurs, the pvfs2-client-core was killed but we 452 * can't afford to lose the inode operations and such 453 * associated with the root handle in any case. 454 */ 455 gossip_debug(GOSSIP_UTILS_DEBUG, 456 "*** NOT making bad root inode %pU\n", 457 get_khandle_from_ino(inode)); 458 } else { 459 gossip_debug(GOSSIP_UTILS_DEBUG, 460 "*** making bad inode %pU\n", 461 get_khandle_from_ino(inode)); 462 make_bad_inode(inode); 463 } 464 } 465 466 /* 467 * The following is a very dirty hack that is now a permanent part of the 468 * ORANGEFS protocol. See protocol.h for more error definitions. 469 */ 470 471 /* The order matches include/orangefs-types.h in the OrangeFS source. */ 472 static int PINT_errno_mapping[] = { 473 0, EPERM, ENOENT, EINTR, EIO, ENXIO, EBADF, EAGAIN, ENOMEM, 474 EFAULT, EBUSY, EEXIST, ENODEV, ENOTDIR, EISDIR, EINVAL, EMFILE, 475 EFBIG, ENOSPC, EROFS, EMLINK, EPIPE, EDEADLK, ENAMETOOLONG, 476 ENOLCK, ENOSYS, ENOTEMPTY, ELOOP, EWOULDBLOCK, ENOMSG, EUNATCH, 477 EBADR, EDEADLOCK, ENODATA, ETIME, ENONET, EREMOTE, ECOMM, 478 EPROTO, EBADMSG, EOVERFLOW, ERESTART, EMSGSIZE, EPROTOTYPE, 479 ENOPROTOOPT, EPROTONOSUPPORT, EOPNOTSUPP, EADDRINUSE, 480 EADDRNOTAVAIL, ENETDOWN, ENETUNREACH, ENETRESET, ENOBUFS, 481 ETIMEDOUT, ECONNREFUSED, EHOSTDOWN, EHOSTUNREACH, EALREADY, 482 EACCES, ECONNRESET, ERANGE 483 }; 484 485 int orangefs_normalize_to_errno(__s32 error_code) 486 { 487 __u32 i; 488 489 /* Success */ 490 if (error_code == 0) { 491 return 0; 492 /* 493 * This shouldn't ever happen. If it does it should be fixed on the 494 * server. 495 */ 496 } else if (error_code > 0) { 497 gossip_err("orangefs: error status receieved.\n"); 498 gossip_err("orangefs: assuming error code is inverted.\n"); 499 error_code = -error_code; 500 } 501 502 /* 503 * XXX: This is very bad since error codes from ORANGEFS may not be 504 * suitable for return into userspace. 505 */ 506 507 /* 508 * Convert ORANGEFS error values into errno values suitable for return 509 * from the kernel. 510 */ 511 if ((-error_code) & ORANGEFS_NON_ERRNO_ERROR_BIT) { 512 if (((-error_code) & 513 (ORANGEFS_ERROR_NUMBER_BITS|ORANGEFS_NON_ERRNO_ERROR_BIT| 514 ORANGEFS_ERROR_BIT)) == ORANGEFS_ECANCEL) { 515 /* 516 * cancellation error codes generally correspond to 517 * a timeout from the client's perspective 518 */ 519 error_code = -ETIMEDOUT; 520 } else { 521 /* assume a default error code */ 522 gossip_err("orangefs: warning: got error code without errno equivalent: %d.\n", error_code); 523 error_code = -EINVAL; 524 } 525 526 /* Convert ORANGEFS encoded errno values into regular errno values. */ 527 } else if ((-error_code) & ORANGEFS_ERROR_BIT) { 528 i = (-error_code) & ~(ORANGEFS_ERROR_BIT|ORANGEFS_ERROR_CLASS_BITS); 529 if (i < ARRAY_SIZE(PINT_errno_mapping)) 530 error_code = -PINT_errno_mapping[i]; 531 else 532 error_code = -EINVAL; 533 534 /* 535 * Only ORANGEFS protocol error codes should ever come here. Otherwise 536 * there is a bug somewhere. 537 */ 538 } else { 539 gossip_err("orangefs: orangefs_normalize_to_errno: got error code which is not from ORANGEFS.\n"); 540 } 541 return error_code; 542 } 543 544 #define NUM_MODES 11 545 __s32 ORANGEFS_util_translate_mode(int mode) 546 { 547 int ret = 0; 548 int i = 0; 549 static int modes[NUM_MODES] = { 550 S_IXOTH, S_IWOTH, S_IROTH, 551 S_IXGRP, S_IWGRP, S_IRGRP, 552 S_IXUSR, S_IWUSR, S_IRUSR, 553 S_ISGID, S_ISUID 554 }; 555 static int orangefs_modes[NUM_MODES] = { 556 ORANGEFS_O_EXECUTE, ORANGEFS_O_WRITE, ORANGEFS_O_READ, 557 ORANGEFS_G_EXECUTE, ORANGEFS_G_WRITE, ORANGEFS_G_READ, 558 ORANGEFS_U_EXECUTE, ORANGEFS_U_WRITE, ORANGEFS_U_READ, 559 ORANGEFS_G_SGID, ORANGEFS_U_SUID 560 }; 561 562 for (i = 0; i < NUM_MODES; i++) 563 if (mode & modes[i]) 564 ret |= orangefs_modes[i]; 565 566 return ret; 567 } 568 #undef NUM_MODES 569