1 /* 2 * File operations used by nfsd. Some of these have been ripped from 3 * other parts of the kernel because they weren't exported, others 4 * are partial duplicates with added or changed functionality. 5 * 6 * Note that several functions dget() the dentry upon which they want 7 * to act, most notably those that create directory entries. Response 8 * dentry's are dput()'d if necessary in the release callback. 9 * So if you notice code paths that apparently fail to dput() the 10 * dentry, don't worry--they have been taken care of. 11 * 12 * Copyright (C) 1995-1999 Olaf Kirch <okir@monad.swb.de> 13 * Zerocpy NFS support (C) 2002 Hirokazu Takahashi <taka@valinux.co.jp> 14 */ 15 16 #include <linux/fs.h> 17 #include <linux/file.h> 18 #include <linux/splice.h> 19 #include <linux/fcntl.h> 20 #include <linux/namei.h> 21 #include <linux/delay.h> 22 #include <linux/fsnotify.h> 23 #include <linux/posix_acl_xattr.h> 24 #include <linux/xattr.h> 25 #include <linux/jhash.h> 26 #include <linux/ima.h> 27 #include <linux/slab.h> 28 #include <asm/uaccess.h> 29 #include <linux/exportfs.h> 30 #include <linux/writeback.h> 31 32 #ifdef CONFIG_NFSD_V3 33 #include "xdr3.h" 34 #endif /* CONFIG_NFSD_V3 */ 35 36 #ifdef CONFIG_NFSD_V4 37 #include "acl.h" 38 #include "idmap.h" 39 #endif /* CONFIG_NFSD_V4 */ 40 41 #include "nfsd.h" 42 #include "vfs.h" 43 44 #define NFSDDBG_FACILITY NFSDDBG_FILEOP 45 46 47 /* 48 * This is a cache of readahead params that help us choose the proper 49 * readahead strategy. Initially, we set all readahead parameters to 0 50 * and let the VFS handle things. 51 * If you increase the number of cached files very much, you'll need to 52 * add a hash table here. 53 */ 54 struct raparms { 55 struct raparms *p_next; 56 unsigned int p_count; 57 ino_t p_ino; 58 dev_t p_dev; 59 int p_set; 60 struct file_ra_state p_ra; 61 unsigned int p_hindex; 62 }; 63 64 struct raparm_hbucket { 65 struct raparms *pb_head; 66 spinlock_t pb_lock; 67 } ____cacheline_aligned_in_smp; 68 69 #define RAPARM_HASH_BITS 4 70 #define RAPARM_HASH_SIZE (1<<RAPARM_HASH_BITS) 71 #define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1) 72 static struct raparm_hbucket raparm_hash[RAPARM_HASH_SIZE]; 73 74 /* 75 * Called from nfsd_lookup and encode_dirent. Check if we have crossed 76 * a mount point. 77 * Returns -EAGAIN or -ETIMEDOUT leaving *dpp and *expp unchanged, 78 * or nfs_ok having possibly changed *dpp and *expp 79 */ 80 int 81 nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, 82 struct svc_export **expp) 83 { 84 struct svc_export *exp = *expp, *exp2 = NULL; 85 struct dentry *dentry = *dpp; 86 struct path path = {.mnt = mntget(exp->ex_path.mnt), 87 .dentry = dget(dentry)}; 88 int err = 0; 89 90 err = follow_down(&path); 91 if (err < 0) 92 goto out; 93 94 exp2 = rqst_exp_get_by_name(rqstp, &path); 95 if (IS_ERR(exp2)) { 96 err = PTR_ERR(exp2); 97 /* 98 * We normally allow NFS clients to continue 99 * "underneath" a mountpoint that is not exported. 100 * The exception is V4ROOT, where no traversal is ever 101 * allowed without an explicit export of the new 102 * directory. 103 */ 104 if (err == -ENOENT && !(exp->ex_flags & NFSEXP_V4ROOT)) 105 err = 0; 106 path_put(&path); 107 goto out; 108 } 109 if (nfsd_v4client(rqstp) || 110 (exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) { 111 /* successfully crossed mount point */ 112 /* 113 * This is subtle: path.dentry is *not* on path.mnt 114 * at this point. The only reason we are safe is that 115 * original mnt is pinned down by exp, so we should 116 * put path *before* putting exp 117 */ 118 *dpp = path.dentry; 119 path.dentry = dentry; 120 *expp = exp2; 121 exp2 = exp; 122 } 123 path_put(&path); 124 exp_put(exp2); 125 out: 126 return err; 127 } 128 129 static void follow_to_parent(struct path *path) 130 { 131 struct dentry *dp; 132 133 while (path->dentry == path->mnt->mnt_root && follow_up(path)) 134 ; 135 dp = dget_parent(path->dentry); 136 dput(path->dentry); 137 path->dentry = dp; 138 } 139 140 static int nfsd_lookup_parent(struct svc_rqst *rqstp, struct dentry *dparent, struct svc_export **exp, struct dentry **dentryp) 141 { 142 struct svc_export *exp2; 143 struct path path = {.mnt = mntget((*exp)->ex_path.mnt), 144 .dentry = dget(dparent)}; 145 146 follow_to_parent(&path); 147 148 exp2 = rqst_exp_parent(rqstp, &path); 149 if (PTR_ERR(exp2) == -ENOENT) { 150 *dentryp = dget(dparent); 151 } else if (IS_ERR(exp2)) { 152 path_put(&path); 153 return PTR_ERR(exp2); 154 } else { 155 *dentryp = dget(path.dentry); 156 exp_put(*exp); 157 *exp = exp2; 158 } 159 path_put(&path); 160 return 0; 161 } 162 163 /* 164 * For nfsd purposes, we treat V4ROOT exports as though there was an 165 * export at *every* directory. 166 */ 167 int nfsd_mountpoint(struct dentry *dentry, struct svc_export *exp) 168 { 169 if (d_mountpoint(dentry)) 170 return 1; 171 if (!(exp->ex_flags & NFSEXP_V4ROOT)) 172 return 0; 173 return dentry->d_inode != NULL; 174 } 175 176 __be32 177 nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp, 178 const char *name, unsigned int len, 179 struct svc_export **exp_ret, struct dentry **dentry_ret) 180 { 181 struct svc_export *exp; 182 struct dentry *dparent; 183 struct dentry *dentry; 184 int host_err; 185 186 dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name); 187 188 dparent = fhp->fh_dentry; 189 exp = fhp->fh_export; 190 exp_get(exp); 191 192 /* Lookup the name, but don't follow links */ 193 if (isdotent(name, len)) { 194 if (len==1) 195 dentry = dget(dparent); 196 else if (dparent != exp->ex_path.dentry) 197 dentry = dget_parent(dparent); 198 else if (!EX_NOHIDE(exp) && !nfsd_v4client(rqstp)) 199 dentry = dget(dparent); /* .. == . just like at / */ 200 else { 201 /* checking mountpoint crossing is very different when stepping up */ 202 host_err = nfsd_lookup_parent(rqstp, dparent, &exp, &dentry); 203 if (host_err) 204 goto out_nfserr; 205 } 206 } else { 207 fh_lock(fhp); 208 dentry = lookup_one_len(name, dparent, len); 209 host_err = PTR_ERR(dentry); 210 if (IS_ERR(dentry)) 211 goto out_nfserr; 212 /* 213 * check if we have crossed a mount point ... 214 */ 215 if (nfsd_mountpoint(dentry, exp)) { 216 if ((host_err = nfsd_cross_mnt(rqstp, &dentry, &exp))) { 217 dput(dentry); 218 goto out_nfserr; 219 } 220 } 221 } 222 *dentry_ret = dentry; 223 *exp_ret = exp; 224 return 0; 225 226 out_nfserr: 227 exp_put(exp); 228 return nfserrno(host_err); 229 } 230 231 /* 232 * Look up one component of a pathname. 233 * N.B. After this call _both_ fhp and resfh need an fh_put 234 * 235 * If the lookup would cross a mountpoint, and the mounted filesystem 236 * is exported to the client with NFSEXP_NOHIDE, then the lookup is 237 * accepted as it stands and the mounted directory is 238 * returned. Otherwise the covered directory is returned. 239 * NOTE: this mountpoint crossing is not supported properly by all 240 * clients and is explicitly disallowed for NFSv3 241 * NeilBrown <neilb@cse.unsw.edu.au> 242 */ 243 __be32 244 nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, 245 unsigned int len, struct svc_fh *resfh) 246 { 247 struct svc_export *exp; 248 struct dentry *dentry; 249 __be32 err; 250 251 err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC); 252 if (err) 253 return err; 254 err = nfsd_lookup_dentry(rqstp, fhp, name, len, &exp, &dentry); 255 if (err) 256 return err; 257 err = check_nfsd_access(exp, rqstp); 258 if (err) 259 goto out; 260 /* 261 * Note: we compose the file handle now, but as the 262 * dentry may be negative, it may need to be updated. 263 */ 264 err = fh_compose(resfh, exp, dentry, fhp); 265 if (!err && !dentry->d_inode) 266 err = nfserr_noent; 267 out: 268 dput(dentry); 269 exp_put(exp); 270 return err; 271 } 272 273 static int nfsd_break_lease(struct inode *inode) 274 { 275 if (!S_ISREG(inode->i_mode)) 276 return 0; 277 return break_lease(inode, O_WRONLY | O_NONBLOCK); 278 } 279 280 /* 281 * Commit metadata changes to stable storage. 282 */ 283 static int 284 commit_metadata(struct svc_fh *fhp) 285 { 286 struct inode *inode = fhp->fh_dentry->d_inode; 287 const struct export_operations *export_ops = inode->i_sb->s_export_op; 288 289 if (!EX_ISSYNC(fhp->fh_export)) 290 return 0; 291 292 if (export_ops->commit_metadata) 293 return export_ops->commit_metadata(inode); 294 return sync_inode_metadata(inode, 1); 295 } 296 297 /* 298 * Set various file attributes. 299 * N.B. After this call fhp needs an fh_put 300 */ 301 __be32 302 nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, 303 int check_guard, time_t guardtime) 304 { 305 struct dentry *dentry; 306 struct inode *inode; 307 int accmode = NFSD_MAY_SATTR; 308 int ftype = 0; 309 __be32 err; 310 int host_err; 311 int size_change = 0; 312 313 if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE)) 314 accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE; 315 if (iap->ia_valid & ATTR_SIZE) 316 ftype = S_IFREG; 317 318 /* Get inode */ 319 err = fh_verify(rqstp, fhp, ftype, accmode); 320 if (err) 321 goto out; 322 323 dentry = fhp->fh_dentry; 324 inode = dentry->d_inode; 325 326 /* Ignore any mode updates on symlinks */ 327 if (S_ISLNK(inode->i_mode)) 328 iap->ia_valid &= ~ATTR_MODE; 329 330 if (!iap->ia_valid) 331 goto out; 332 333 /* 334 * NFSv2 does not differentiate between "set-[ac]time-to-now" 335 * which only requires access, and "set-[ac]time-to-X" which 336 * requires ownership. 337 * So if it looks like it might be "set both to the same time which 338 * is close to now", and if inode_change_ok fails, then we 339 * convert to "set to now" instead of "set to explicit time" 340 * 341 * We only call inode_change_ok as the last test as technically 342 * it is not an interface that we should be using. It is only 343 * valid if the filesystem does not define it's own i_op->setattr. 344 */ 345 #define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET) 346 #define MAX_TOUCH_TIME_ERROR (30*60) 347 if ((iap->ia_valid & BOTH_TIME_SET) == BOTH_TIME_SET && 348 iap->ia_mtime.tv_sec == iap->ia_atime.tv_sec) { 349 /* 350 * Looks probable. 351 * 352 * Now just make sure time is in the right ballpark. 353 * Solaris, at least, doesn't seem to care what the time 354 * request is. We require it be within 30 minutes of now. 355 */ 356 time_t delta = iap->ia_atime.tv_sec - get_seconds(); 357 if (delta < 0) 358 delta = -delta; 359 if (delta < MAX_TOUCH_TIME_ERROR && 360 inode_change_ok(inode, iap) != 0) { 361 /* 362 * Turn off ATTR_[AM]TIME_SET but leave ATTR_[AM]TIME. 363 * This will cause notify_change to set these times 364 * to "now" 365 */ 366 iap->ia_valid &= ~BOTH_TIME_SET; 367 } 368 } 369 370 /* 371 * The size case is special. 372 * It changes the file as well as the attributes. 373 */ 374 if (iap->ia_valid & ATTR_SIZE) { 375 if (iap->ia_size < inode->i_size) { 376 err = nfsd_permission(rqstp, fhp->fh_export, dentry, 377 NFSD_MAY_TRUNC|NFSD_MAY_OWNER_OVERRIDE); 378 if (err) 379 goto out; 380 } 381 382 host_err = get_write_access(inode); 383 if (host_err) 384 goto out_nfserr; 385 386 size_change = 1; 387 host_err = locks_verify_truncate(inode, NULL, iap->ia_size); 388 if (host_err) { 389 put_write_access(inode); 390 goto out_nfserr; 391 } 392 } 393 394 /* sanitize the mode change */ 395 if (iap->ia_valid & ATTR_MODE) { 396 iap->ia_mode &= S_IALLUGO; 397 iap->ia_mode |= (inode->i_mode & ~S_IALLUGO); 398 } 399 400 /* Revoke setuid/setgid on chown */ 401 if (!S_ISDIR(inode->i_mode) && 402 (((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid) || 403 ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid))) { 404 iap->ia_valid |= ATTR_KILL_PRIV; 405 if (iap->ia_valid & ATTR_MODE) { 406 /* we're setting mode too, just clear the s*id bits */ 407 iap->ia_mode &= ~S_ISUID; 408 if (iap->ia_mode & S_IXGRP) 409 iap->ia_mode &= ~S_ISGID; 410 } else { 411 /* set ATTR_KILL_* bits and let VFS handle it */ 412 iap->ia_valid |= (ATTR_KILL_SUID | ATTR_KILL_SGID); 413 } 414 } 415 416 /* Change the attributes. */ 417 418 iap->ia_valid |= ATTR_CTIME; 419 420 err = nfserr_notsync; 421 if (!check_guard || guardtime == inode->i_ctime.tv_sec) { 422 host_err = nfsd_break_lease(inode); 423 if (host_err) 424 goto out_nfserr; 425 fh_lock(fhp); 426 427 host_err = notify_change(dentry, iap); 428 err = nfserrno(host_err); 429 fh_unlock(fhp); 430 } 431 if (size_change) 432 put_write_access(inode); 433 if (!err) 434 commit_metadata(fhp); 435 out: 436 return err; 437 438 out_nfserr: 439 err = nfserrno(host_err); 440 goto out; 441 } 442 443 #if defined(CONFIG_NFSD_V2_ACL) || \ 444 defined(CONFIG_NFSD_V3_ACL) || \ 445 defined(CONFIG_NFSD_V4) 446 static ssize_t nfsd_getxattr(struct dentry *dentry, char *key, void **buf) 447 { 448 ssize_t buflen; 449 ssize_t ret; 450 451 buflen = vfs_getxattr(dentry, key, NULL, 0); 452 if (buflen <= 0) 453 return buflen; 454 455 *buf = kmalloc(buflen, GFP_KERNEL); 456 if (!*buf) 457 return -ENOMEM; 458 459 ret = vfs_getxattr(dentry, key, *buf, buflen); 460 if (ret < 0) 461 kfree(*buf); 462 return ret; 463 } 464 #endif 465 466 #if defined(CONFIG_NFSD_V4) 467 static int 468 set_nfsv4_acl_one(struct dentry *dentry, struct posix_acl *pacl, char *key) 469 { 470 int len; 471 size_t buflen; 472 char *buf = NULL; 473 int error = 0; 474 475 buflen = posix_acl_xattr_size(pacl->a_count); 476 buf = kmalloc(buflen, GFP_KERNEL); 477 error = -ENOMEM; 478 if (buf == NULL) 479 goto out; 480 481 len = posix_acl_to_xattr(pacl, buf, buflen); 482 if (len < 0) { 483 error = len; 484 goto out; 485 } 486 487 error = vfs_setxattr(dentry, key, buf, len, 0); 488 out: 489 kfree(buf); 490 return error; 491 } 492 493 __be32 494 nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, 495 struct nfs4_acl *acl) 496 { 497 __be32 error; 498 int host_error; 499 struct dentry *dentry; 500 struct inode *inode; 501 struct posix_acl *pacl = NULL, *dpacl = NULL; 502 unsigned int flags = 0; 503 504 /* Get inode */ 505 error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, NFSD_MAY_SATTR); 506 if (error) 507 return error; 508 509 dentry = fhp->fh_dentry; 510 inode = dentry->d_inode; 511 if (S_ISDIR(inode->i_mode)) 512 flags = NFS4_ACL_DIR; 513 514 host_error = nfs4_acl_nfsv4_to_posix(acl, &pacl, &dpacl, flags); 515 if (host_error == -EINVAL) { 516 return nfserr_attrnotsupp; 517 } else if (host_error < 0) 518 goto out_nfserr; 519 520 host_error = set_nfsv4_acl_one(dentry, pacl, POSIX_ACL_XATTR_ACCESS); 521 if (host_error < 0) 522 goto out_release; 523 524 if (S_ISDIR(inode->i_mode)) 525 host_error = set_nfsv4_acl_one(dentry, dpacl, POSIX_ACL_XATTR_DEFAULT); 526 527 out_release: 528 posix_acl_release(pacl); 529 posix_acl_release(dpacl); 530 out_nfserr: 531 if (host_error == -EOPNOTSUPP) 532 return nfserr_attrnotsupp; 533 else 534 return nfserrno(host_error); 535 } 536 537 static struct posix_acl * 538 _get_posix_acl(struct dentry *dentry, char *key) 539 { 540 void *buf = NULL; 541 struct posix_acl *pacl = NULL; 542 int buflen; 543 544 buflen = nfsd_getxattr(dentry, key, &buf); 545 if (!buflen) 546 buflen = -ENODATA; 547 if (buflen <= 0) 548 return ERR_PTR(buflen); 549 550 pacl = posix_acl_from_xattr(buf, buflen); 551 kfree(buf); 552 return pacl; 553 } 554 555 int 556 nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_acl **acl) 557 { 558 struct inode *inode = dentry->d_inode; 559 int error = 0; 560 struct posix_acl *pacl = NULL, *dpacl = NULL; 561 unsigned int flags = 0; 562 563 pacl = _get_posix_acl(dentry, POSIX_ACL_XATTR_ACCESS); 564 if (IS_ERR(pacl) && PTR_ERR(pacl) == -ENODATA) 565 pacl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); 566 if (IS_ERR(pacl)) { 567 error = PTR_ERR(pacl); 568 pacl = NULL; 569 goto out; 570 } 571 572 if (S_ISDIR(inode->i_mode)) { 573 dpacl = _get_posix_acl(dentry, POSIX_ACL_XATTR_DEFAULT); 574 if (IS_ERR(dpacl) && PTR_ERR(dpacl) == -ENODATA) 575 dpacl = NULL; 576 else if (IS_ERR(dpacl)) { 577 error = PTR_ERR(dpacl); 578 dpacl = NULL; 579 goto out; 580 } 581 flags = NFS4_ACL_DIR; 582 } 583 584 *acl = nfs4_acl_posix_to_nfsv4(pacl, dpacl, flags); 585 if (IS_ERR(*acl)) { 586 error = PTR_ERR(*acl); 587 *acl = NULL; 588 } 589 out: 590 posix_acl_release(pacl); 591 posix_acl_release(dpacl); 592 return error; 593 } 594 595 #endif /* defined(CONFIG_NFSD_V4) */ 596 597 #ifdef CONFIG_NFSD_V3 598 /* 599 * Check server access rights to a file system object 600 */ 601 struct accessmap { 602 u32 access; 603 int how; 604 }; 605 static struct accessmap nfs3_regaccess[] = { 606 { NFS3_ACCESS_READ, NFSD_MAY_READ }, 607 { NFS3_ACCESS_EXECUTE, NFSD_MAY_EXEC }, 608 { NFS3_ACCESS_MODIFY, NFSD_MAY_WRITE|NFSD_MAY_TRUNC }, 609 { NFS3_ACCESS_EXTEND, NFSD_MAY_WRITE }, 610 611 { 0, 0 } 612 }; 613 614 static struct accessmap nfs3_diraccess[] = { 615 { NFS3_ACCESS_READ, NFSD_MAY_READ }, 616 { NFS3_ACCESS_LOOKUP, NFSD_MAY_EXEC }, 617 { NFS3_ACCESS_MODIFY, NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC}, 618 { NFS3_ACCESS_EXTEND, NFSD_MAY_EXEC|NFSD_MAY_WRITE }, 619 { NFS3_ACCESS_DELETE, NFSD_MAY_REMOVE }, 620 621 { 0, 0 } 622 }; 623 624 static struct accessmap nfs3_anyaccess[] = { 625 /* Some clients - Solaris 2.6 at least, make an access call 626 * to the server to check for access for things like /dev/null 627 * (which really, the server doesn't care about). So 628 * We provide simple access checking for them, looking 629 * mainly at mode bits, and we make sure to ignore read-only 630 * filesystem checks 631 */ 632 { NFS3_ACCESS_READ, NFSD_MAY_READ }, 633 { NFS3_ACCESS_EXECUTE, NFSD_MAY_EXEC }, 634 { NFS3_ACCESS_MODIFY, NFSD_MAY_WRITE|NFSD_MAY_LOCAL_ACCESS }, 635 { NFS3_ACCESS_EXTEND, NFSD_MAY_WRITE|NFSD_MAY_LOCAL_ACCESS }, 636 637 { 0, 0 } 638 }; 639 640 __be32 641 nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *supported) 642 { 643 struct accessmap *map; 644 struct svc_export *export; 645 struct dentry *dentry; 646 u32 query, result = 0, sresult = 0; 647 __be32 error; 648 649 error = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP); 650 if (error) 651 goto out; 652 653 export = fhp->fh_export; 654 dentry = fhp->fh_dentry; 655 656 if (S_ISREG(dentry->d_inode->i_mode)) 657 map = nfs3_regaccess; 658 else if (S_ISDIR(dentry->d_inode->i_mode)) 659 map = nfs3_diraccess; 660 else 661 map = nfs3_anyaccess; 662 663 664 query = *access; 665 for (; map->access; map++) { 666 if (map->access & query) { 667 __be32 err2; 668 669 sresult |= map->access; 670 671 err2 = nfsd_permission(rqstp, export, dentry, map->how); 672 switch (err2) { 673 case nfs_ok: 674 result |= map->access; 675 break; 676 677 /* the following error codes just mean the access was not allowed, 678 * rather than an error occurred */ 679 case nfserr_rofs: 680 case nfserr_acces: 681 case nfserr_perm: 682 /* simply don't "or" in the access bit. */ 683 break; 684 default: 685 error = err2; 686 goto out; 687 } 688 } 689 } 690 *access = result; 691 if (supported) 692 *supported = sresult; 693 694 out: 695 return error; 696 } 697 #endif /* CONFIG_NFSD_V3 */ 698 699 700 701 /* 702 * Open an existing file or directory. 703 * The access argument indicates the type of open (read/write/lock) 704 * N.B. After this call fhp needs an fh_put 705 */ 706 __be32 707 nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, 708 int access, struct file **filp) 709 { 710 struct dentry *dentry; 711 struct inode *inode; 712 int flags = O_RDONLY|O_LARGEFILE; 713 __be32 err; 714 int host_err = 0; 715 716 validate_process_creds(); 717 718 /* 719 * If we get here, then the client has already done an "open", 720 * and (hopefully) checked permission - so allow OWNER_OVERRIDE 721 * in case a chmod has now revoked permission. 722 */ 723 err = fh_verify(rqstp, fhp, type, access | NFSD_MAY_OWNER_OVERRIDE); 724 if (err) 725 goto out; 726 727 dentry = fhp->fh_dentry; 728 inode = dentry->d_inode; 729 730 /* Disallow write access to files with the append-only bit set 731 * or any access when mandatory locking enabled 732 */ 733 err = nfserr_perm; 734 if (IS_APPEND(inode) && (access & NFSD_MAY_WRITE)) 735 goto out; 736 /* 737 * We must ignore files (but only files) which might have mandatory 738 * locks on them because there is no way to know if the accesser has 739 * the lock. 740 */ 741 if (S_ISREG((inode)->i_mode) && mandatory_lock(inode)) 742 goto out; 743 744 if (!inode->i_fop) 745 goto out; 746 747 /* 748 * Check to see if there are any leases on this file. 749 * This may block while leases are broken. 750 */ 751 if (!(access & NFSD_MAY_NOT_BREAK_LEASE)) 752 host_err = break_lease(inode, O_NONBLOCK | ((access & NFSD_MAY_WRITE) ? O_WRONLY : 0)); 753 if (host_err) /* NOMEM or WOULDBLOCK */ 754 goto out_nfserr; 755 756 if (access & NFSD_MAY_WRITE) { 757 if (access & NFSD_MAY_READ) 758 flags = O_RDWR|O_LARGEFILE; 759 else 760 flags = O_WRONLY|O_LARGEFILE; 761 } 762 *filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_path.mnt), 763 flags, current_cred()); 764 if (IS_ERR(*filp)) 765 host_err = PTR_ERR(*filp); 766 else 767 host_err = ima_file_check(*filp, access); 768 out_nfserr: 769 err = nfserrno(host_err); 770 out: 771 validate_process_creds(); 772 return err; 773 } 774 775 /* 776 * Close a file. 777 */ 778 void 779 nfsd_close(struct file *filp) 780 { 781 fput(filp); 782 } 783 784 /* 785 * Obtain the readahead parameters for the file 786 * specified by (dev, ino). 787 */ 788 789 static inline struct raparms * 790 nfsd_get_raparms(dev_t dev, ino_t ino) 791 { 792 struct raparms *ra, **rap, **frap = NULL; 793 int depth = 0; 794 unsigned int hash; 795 struct raparm_hbucket *rab; 796 797 hash = jhash_2words(dev, ino, 0xfeedbeef) & RAPARM_HASH_MASK; 798 rab = &raparm_hash[hash]; 799 800 spin_lock(&rab->pb_lock); 801 for (rap = &rab->pb_head; (ra = *rap); rap = &ra->p_next) { 802 if (ra->p_ino == ino && ra->p_dev == dev) 803 goto found; 804 depth++; 805 if (ra->p_count == 0) 806 frap = rap; 807 } 808 depth = nfsdstats.ra_size; 809 if (!frap) { 810 spin_unlock(&rab->pb_lock); 811 return NULL; 812 } 813 rap = frap; 814 ra = *frap; 815 ra->p_dev = dev; 816 ra->p_ino = ino; 817 ra->p_set = 0; 818 ra->p_hindex = hash; 819 found: 820 if (rap != &rab->pb_head) { 821 *rap = ra->p_next; 822 ra->p_next = rab->pb_head; 823 rab->pb_head = ra; 824 } 825 ra->p_count++; 826 nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++; 827 spin_unlock(&rab->pb_lock); 828 return ra; 829 } 830 831 /* 832 * Grab and keep cached pages associated with a file in the svc_rqst 833 * so that they can be passed to the network sendmsg/sendpage routines 834 * directly. They will be released after the sending has completed. 835 */ 836 static int 837 nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf, 838 struct splice_desc *sd) 839 { 840 struct svc_rqst *rqstp = sd->u.data; 841 struct page **pp = rqstp->rq_respages + rqstp->rq_resused; 842 struct page *page = buf->page; 843 size_t size; 844 845 size = sd->len; 846 847 if (rqstp->rq_res.page_len == 0) { 848 get_page(page); 849 put_page(*pp); 850 *pp = page; 851 rqstp->rq_resused++; 852 rqstp->rq_res.page_base = buf->offset; 853 rqstp->rq_res.page_len = size; 854 } else if (page != pp[-1]) { 855 get_page(page); 856 if (*pp) 857 put_page(*pp); 858 *pp = page; 859 rqstp->rq_resused++; 860 rqstp->rq_res.page_len += size; 861 } else 862 rqstp->rq_res.page_len += size; 863 864 return size; 865 } 866 867 static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe, 868 struct splice_desc *sd) 869 { 870 return __splice_from_pipe(pipe, sd, nfsd_splice_actor); 871 } 872 873 static __be32 874 nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, 875 loff_t offset, struct kvec *vec, int vlen, unsigned long *count) 876 { 877 mm_segment_t oldfs; 878 __be32 err; 879 int host_err; 880 881 err = nfserr_perm; 882 883 if (file->f_op->splice_read && rqstp->rq_splice_ok) { 884 struct splice_desc sd = { 885 .len = 0, 886 .total_len = *count, 887 .pos = offset, 888 .u.data = rqstp, 889 }; 890 891 rqstp->rq_resused = 1; 892 host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor); 893 } else { 894 oldfs = get_fs(); 895 set_fs(KERNEL_DS); 896 host_err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset); 897 set_fs(oldfs); 898 } 899 900 if (host_err >= 0) { 901 nfsdstats.io_read += host_err; 902 *count = host_err; 903 err = 0; 904 fsnotify_access(file); 905 } else 906 err = nfserrno(host_err); 907 return err; 908 } 909 910 static void kill_suid(struct dentry *dentry) 911 { 912 struct iattr ia; 913 ia.ia_valid = ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; 914 915 mutex_lock(&dentry->d_inode->i_mutex); 916 notify_change(dentry, &ia); 917 mutex_unlock(&dentry->d_inode->i_mutex); 918 } 919 920 /* 921 * Gathered writes: If another process is currently writing to the file, 922 * there's a high chance this is another nfsd (triggered by a bulk write 923 * from a client's biod). Rather than syncing the file with each write 924 * request, we sleep for 10 msec. 925 * 926 * I don't know if this roughly approximates C. Juszak's idea of 927 * gathered writes, but it's a nice and simple solution (IMHO), and it 928 * seems to work:-) 929 * 930 * Note: we do this only in the NFSv2 case, since v3 and higher have a 931 * better tool (separate unstable writes and commits) for solving this 932 * problem. 933 */ 934 static int wait_for_concurrent_writes(struct file *file) 935 { 936 struct inode *inode = file->f_path.dentry->d_inode; 937 static ino_t last_ino; 938 static dev_t last_dev; 939 int err = 0; 940 941 if (atomic_read(&inode->i_writecount) > 1 942 || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) { 943 dprintk("nfsd: write defer %d\n", task_pid_nr(current)); 944 msleep(10); 945 dprintk("nfsd: write resume %d\n", task_pid_nr(current)); 946 } 947 948 if (inode->i_state & I_DIRTY) { 949 dprintk("nfsd: write sync %d\n", task_pid_nr(current)); 950 err = vfs_fsync(file, 0); 951 } 952 last_ino = inode->i_ino; 953 last_dev = inode->i_sb->s_dev; 954 return err; 955 } 956 957 static __be32 958 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, 959 loff_t offset, struct kvec *vec, int vlen, 960 unsigned long *cnt, int *stablep) 961 { 962 struct svc_export *exp; 963 struct dentry *dentry; 964 struct inode *inode; 965 mm_segment_t oldfs; 966 __be32 err = 0; 967 int host_err; 968 int stable = *stablep; 969 int use_wgather; 970 971 dentry = file->f_path.dentry; 972 inode = dentry->d_inode; 973 exp = fhp->fh_export; 974 975 /* 976 * Request sync writes if 977 * - the sync export option has been set, or 978 * - the client requested O_SYNC behavior (NFSv3 feature). 979 * - The file system doesn't support fsync(). 980 * When NFSv2 gathered writes have been configured for this volume, 981 * flushing the data to disk is handled separately below. 982 */ 983 use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp); 984 985 if (!file->f_op->fsync) {/* COMMIT3 cannot work */ 986 stable = 2; 987 *stablep = 2; /* FILE_SYNC */ 988 } 989 990 if (!EX_ISSYNC(exp)) 991 stable = 0; 992 if (stable && !use_wgather) { 993 spin_lock(&file->f_lock); 994 file->f_flags |= O_SYNC; 995 spin_unlock(&file->f_lock); 996 } 997 998 /* Write the data. */ 999 oldfs = get_fs(); set_fs(KERNEL_DS); 1000 host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset); 1001 set_fs(oldfs); 1002 if (host_err < 0) 1003 goto out_nfserr; 1004 *cnt = host_err; 1005 nfsdstats.io_write += host_err; 1006 fsnotify_modify(file); 1007 1008 /* clear setuid/setgid flag after write */ 1009 if (inode->i_mode & (S_ISUID | S_ISGID)) 1010 kill_suid(dentry); 1011 1012 if (stable && use_wgather) 1013 host_err = wait_for_concurrent_writes(file); 1014 1015 out_nfserr: 1016 dprintk("nfsd: write complete host_err=%d\n", host_err); 1017 if (host_err >= 0) 1018 err = 0; 1019 else 1020 err = nfserrno(host_err); 1021 return err; 1022 } 1023 1024 /* 1025 * Read data from a file. count must contain the requested read count 1026 * on entry. On return, *count contains the number of bytes actually read. 1027 * N.B. After this call fhp needs an fh_put 1028 */ 1029 __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, 1030 loff_t offset, struct kvec *vec, int vlen, unsigned long *count) 1031 { 1032 struct file *file; 1033 struct inode *inode; 1034 struct raparms *ra; 1035 __be32 err; 1036 1037 err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file); 1038 if (err) 1039 return err; 1040 1041 inode = file->f_path.dentry->d_inode; 1042 1043 /* Get readahead parameters */ 1044 ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino); 1045 1046 if (ra && ra->p_set) 1047 file->f_ra = ra->p_ra; 1048 1049 err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count); 1050 1051 /* Write back readahead params */ 1052 if (ra) { 1053 struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex]; 1054 spin_lock(&rab->pb_lock); 1055 ra->p_ra = file->f_ra; 1056 ra->p_set = 1; 1057 ra->p_count--; 1058 spin_unlock(&rab->pb_lock); 1059 } 1060 1061 nfsd_close(file); 1062 return err; 1063 } 1064 1065 /* As above, but use the provided file descriptor. */ 1066 __be32 1067 nfsd_read_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, 1068 loff_t offset, struct kvec *vec, int vlen, 1069 unsigned long *count) 1070 { 1071 __be32 err; 1072 1073 if (file) { 1074 err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, 1075 NFSD_MAY_READ|NFSD_MAY_OWNER_OVERRIDE); 1076 if (err) 1077 goto out; 1078 err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count); 1079 } else /* Note file may still be NULL in NFSv4 special stateid case: */ 1080 err = nfsd_read(rqstp, fhp, offset, vec, vlen, count); 1081 out: 1082 return err; 1083 } 1084 1085 /* 1086 * Write data to a file. 1087 * The stable flag requests synchronous writes. 1088 * N.B. After this call fhp needs an fh_put 1089 */ 1090 __be32 1091 nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, 1092 loff_t offset, struct kvec *vec, int vlen, unsigned long *cnt, 1093 int *stablep) 1094 { 1095 __be32 err = 0; 1096 1097 if (file) { 1098 err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, 1099 NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE); 1100 if (err) 1101 goto out; 1102 err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt, 1103 stablep); 1104 } else { 1105 err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file); 1106 if (err) 1107 goto out; 1108 1109 if (cnt) 1110 err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, 1111 cnt, stablep); 1112 nfsd_close(file); 1113 } 1114 out: 1115 return err; 1116 } 1117 1118 #ifdef CONFIG_NFSD_V3 1119 /* 1120 * Commit all pending writes to stable storage. 1121 * 1122 * Note: we only guarantee that data that lies within the range specified 1123 * by the 'offset' and 'count' parameters will be synced. 1124 * 1125 * Unfortunately we cannot lock the file to make sure we return full WCC 1126 * data to the client, as locking happens lower down in the filesystem. 1127 */ 1128 __be32 1129 nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, 1130 loff_t offset, unsigned long count) 1131 { 1132 struct file *file; 1133 loff_t end = LLONG_MAX; 1134 __be32 err = nfserr_inval; 1135 1136 if (offset < 0) 1137 goto out; 1138 if (count != 0) { 1139 end = offset + (loff_t)count - 1; 1140 if (end < offset) 1141 goto out; 1142 } 1143 1144 err = nfsd_open(rqstp, fhp, S_IFREG, 1145 NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &file); 1146 if (err) 1147 goto out; 1148 if (EX_ISSYNC(fhp->fh_export)) { 1149 int err2 = vfs_fsync_range(file, offset, end, 0); 1150 1151 if (err2 != -EINVAL) 1152 err = nfserrno(err2); 1153 else 1154 err = nfserr_notsupp; 1155 } 1156 1157 nfsd_close(file); 1158 out: 1159 return err; 1160 } 1161 #endif /* CONFIG_NFSD_V3 */ 1162 1163 static __be32 1164 nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp, 1165 struct iattr *iap) 1166 { 1167 /* 1168 * Mode has already been set earlier in create: 1169 */ 1170 iap->ia_valid &= ~ATTR_MODE; 1171 /* 1172 * Setting uid/gid works only for root. Irix appears to 1173 * send along the gid on create when it tries to implement 1174 * setgid directories via NFS: 1175 */ 1176 if (current_fsuid() != 0) 1177 iap->ia_valid &= ~(ATTR_UID|ATTR_GID); 1178 if (iap->ia_valid) 1179 return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0); 1180 return 0; 1181 } 1182 1183 /* HPUX client sometimes creates a file in mode 000, and sets size to 0. 1184 * setting size to 0 may fail for some specific file systems by the permission 1185 * checking which requires WRITE permission but the mode is 000. 1186 * we ignore the resizing(to 0) on the just new created file, since the size is 1187 * 0 after file created. 1188 * 1189 * call this only after vfs_create() is called. 1190 * */ 1191 static void 1192 nfsd_check_ignore_resizing(struct iattr *iap) 1193 { 1194 if ((iap->ia_valid & ATTR_SIZE) && (iap->ia_size == 0)) 1195 iap->ia_valid &= ~ATTR_SIZE; 1196 } 1197 1198 /* 1199 * Create a file (regular, directory, device, fifo); UNIX sockets 1200 * not yet implemented. 1201 * If the response fh has been verified, the parent directory should 1202 * already be locked. Note that the parent directory is left locked. 1203 * 1204 * N.B. Every call to nfsd_create needs an fh_put for _both_ fhp and resfhp 1205 */ 1206 __be32 1207 nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, 1208 char *fname, int flen, struct iattr *iap, 1209 int type, dev_t rdev, struct svc_fh *resfhp) 1210 { 1211 struct dentry *dentry, *dchild = NULL; 1212 struct inode *dirp; 1213 __be32 err; 1214 __be32 err2; 1215 int host_err; 1216 1217 err = nfserr_perm; 1218 if (!flen) 1219 goto out; 1220 err = nfserr_exist; 1221 if (isdotent(fname, flen)) 1222 goto out; 1223 1224 err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE); 1225 if (err) 1226 goto out; 1227 1228 dentry = fhp->fh_dentry; 1229 dirp = dentry->d_inode; 1230 1231 err = nfserr_notdir; 1232 if (!dirp->i_op->lookup) 1233 goto out; 1234 /* 1235 * Check whether the response file handle has been verified yet. 1236 * If it has, the parent directory should already be locked. 1237 */ 1238 if (!resfhp->fh_dentry) { 1239 /* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */ 1240 fh_lock_nested(fhp, I_MUTEX_PARENT); 1241 dchild = lookup_one_len(fname, dentry, flen); 1242 host_err = PTR_ERR(dchild); 1243 if (IS_ERR(dchild)) 1244 goto out_nfserr; 1245 err = fh_compose(resfhp, fhp->fh_export, dchild, fhp); 1246 if (err) 1247 goto out; 1248 } else { 1249 /* called from nfsd_proc_create */ 1250 dchild = dget(resfhp->fh_dentry); 1251 if (!fhp->fh_locked) { 1252 /* not actually possible */ 1253 printk(KERN_ERR 1254 "nfsd_create: parent %s/%s not locked!\n", 1255 dentry->d_parent->d_name.name, 1256 dentry->d_name.name); 1257 err = nfserr_io; 1258 goto out; 1259 } 1260 } 1261 /* 1262 * Make sure the child dentry is still negative ... 1263 */ 1264 err = nfserr_exist; 1265 if (dchild->d_inode) { 1266 dprintk("nfsd_create: dentry %s/%s not negative!\n", 1267 dentry->d_name.name, dchild->d_name.name); 1268 goto out; 1269 } 1270 1271 if (!(iap->ia_valid & ATTR_MODE)) 1272 iap->ia_mode = 0; 1273 iap->ia_mode = (iap->ia_mode & S_IALLUGO) | type; 1274 1275 err = nfserr_inval; 1276 if (!S_ISREG(type) && !S_ISDIR(type) && !special_file(type)) { 1277 printk(KERN_WARNING "nfsd: bad file type %o in nfsd_create\n", 1278 type); 1279 goto out; 1280 } 1281 1282 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt); 1283 if (host_err) 1284 goto out_nfserr; 1285 1286 /* 1287 * Get the dir op function pointer. 1288 */ 1289 err = 0; 1290 switch (type) { 1291 case S_IFREG: 1292 host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL); 1293 if (!host_err) 1294 nfsd_check_ignore_resizing(iap); 1295 break; 1296 case S_IFDIR: 1297 host_err = vfs_mkdir(dirp, dchild, iap->ia_mode); 1298 break; 1299 case S_IFCHR: 1300 case S_IFBLK: 1301 case S_IFIFO: 1302 case S_IFSOCK: 1303 host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev); 1304 break; 1305 } 1306 if (host_err < 0) { 1307 mnt_drop_write(fhp->fh_export->ex_path.mnt); 1308 goto out_nfserr; 1309 } 1310 1311 err = nfsd_create_setattr(rqstp, resfhp, iap); 1312 1313 /* 1314 * nfsd_setattr already committed the child. Transactional filesystems 1315 * had a chance to commit changes for both parent and child 1316 * simultaneously making the following commit_metadata a noop. 1317 */ 1318 err2 = nfserrno(commit_metadata(fhp)); 1319 if (err2) 1320 err = err2; 1321 mnt_drop_write(fhp->fh_export->ex_path.mnt); 1322 /* 1323 * Update the file handle to get the new inode info. 1324 */ 1325 if (!err) 1326 err = fh_update(resfhp); 1327 out: 1328 if (dchild && !IS_ERR(dchild)) 1329 dput(dchild); 1330 return err; 1331 1332 out_nfserr: 1333 err = nfserrno(host_err); 1334 goto out; 1335 } 1336 1337 #ifdef CONFIG_NFSD_V3 1338 1339 static inline int nfsd_create_is_exclusive(int createmode) 1340 { 1341 return createmode == NFS3_CREATE_EXCLUSIVE 1342 || createmode == NFS4_CREATE_EXCLUSIVE4_1; 1343 } 1344 1345 /* 1346 * NFSv3 and NFSv4 version of nfsd_create 1347 */ 1348 __be32 1349 do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, 1350 char *fname, int flen, struct iattr *iap, 1351 struct svc_fh *resfhp, int createmode, u32 *verifier, 1352 int *truncp, int *created) 1353 { 1354 struct dentry *dentry, *dchild = NULL; 1355 struct inode *dirp; 1356 __be32 err; 1357 int host_err; 1358 __u32 v_mtime=0, v_atime=0; 1359 1360 err = nfserr_perm; 1361 if (!flen) 1362 goto out; 1363 err = nfserr_exist; 1364 if (isdotent(fname, flen)) 1365 goto out; 1366 if (!(iap->ia_valid & ATTR_MODE)) 1367 iap->ia_mode = 0; 1368 err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC); 1369 if (err) 1370 goto out; 1371 1372 dentry = fhp->fh_dentry; 1373 dirp = dentry->d_inode; 1374 1375 /* Get all the sanity checks out of the way before 1376 * we lock the parent. */ 1377 err = nfserr_notdir; 1378 if (!dirp->i_op->lookup) 1379 goto out; 1380 fh_lock_nested(fhp, I_MUTEX_PARENT); 1381 1382 /* 1383 * Compose the response file handle. 1384 */ 1385 dchild = lookup_one_len(fname, dentry, flen); 1386 host_err = PTR_ERR(dchild); 1387 if (IS_ERR(dchild)) 1388 goto out_nfserr; 1389 1390 /* If file doesn't exist, check for permissions to create one */ 1391 if (!dchild->d_inode) { 1392 err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE); 1393 if (err) 1394 goto out; 1395 } 1396 1397 err = fh_compose(resfhp, fhp->fh_export, dchild, fhp); 1398 if (err) 1399 goto out; 1400 1401 if (nfsd_create_is_exclusive(createmode)) { 1402 /* solaris7 gets confused (bugid 4218508) if these have 1403 * the high bit set, so just clear the high bits. If this is 1404 * ever changed to use different attrs for storing the 1405 * verifier, then do_open_lookup() will also need to be fixed 1406 * accordingly. 1407 */ 1408 v_mtime = verifier[0]&0x7fffffff; 1409 v_atime = verifier[1]&0x7fffffff; 1410 } 1411 1412 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt); 1413 if (host_err) 1414 goto out_nfserr; 1415 if (dchild->d_inode) { 1416 err = 0; 1417 1418 switch (createmode) { 1419 case NFS3_CREATE_UNCHECKED: 1420 if (! S_ISREG(dchild->d_inode->i_mode)) 1421 err = nfserr_exist; 1422 else if (truncp) { 1423 /* in nfsv4, we need to treat this case a little 1424 * differently. we don't want to truncate the 1425 * file now; this would be wrong if the OPEN 1426 * fails for some other reason. furthermore, 1427 * if the size is nonzero, we should ignore it 1428 * according to spec! 1429 */ 1430 *truncp = (iap->ia_valid & ATTR_SIZE) && !iap->ia_size; 1431 } 1432 else { 1433 iap->ia_valid &= ATTR_SIZE; 1434 goto set_attr; 1435 } 1436 break; 1437 case NFS3_CREATE_EXCLUSIVE: 1438 if ( dchild->d_inode->i_mtime.tv_sec == v_mtime 1439 && dchild->d_inode->i_atime.tv_sec == v_atime 1440 && dchild->d_inode->i_size == 0 ) 1441 break; 1442 case NFS4_CREATE_EXCLUSIVE4_1: 1443 if ( dchild->d_inode->i_mtime.tv_sec == v_mtime 1444 && dchild->d_inode->i_atime.tv_sec == v_atime 1445 && dchild->d_inode->i_size == 0 ) 1446 goto set_attr; 1447 /* fallthru */ 1448 case NFS3_CREATE_GUARDED: 1449 err = nfserr_exist; 1450 } 1451 mnt_drop_write(fhp->fh_export->ex_path.mnt); 1452 goto out; 1453 } 1454 1455 host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL); 1456 if (host_err < 0) { 1457 mnt_drop_write(fhp->fh_export->ex_path.mnt); 1458 goto out_nfserr; 1459 } 1460 if (created) 1461 *created = 1; 1462 1463 nfsd_check_ignore_resizing(iap); 1464 1465 if (nfsd_create_is_exclusive(createmode)) { 1466 /* Cram the verifier into atime/mtime */ 1467 iap->ia_valid = ATTR_MTIME|ATTR_ATIME 1468 | ATTR_MTIME_SET|ATTR_ATIME_SET; 1469 /* XXX someone who knows this better please fix it for nsec */ 1470 iap->ia_mtime.tv_sec = v_mtime; 1471 iap->ia_atime.tv_sec = v_atime; 1472 iap->ia_mtime.tv_nsec = 0; 1473 iap->ia_atime.tv_nsec = 0; 1474 } 1475 1476 set_attr: 1477 err = nfsd_create_setattr(rqstp, resfhp, iap); 1478 1479 /* 1480 * nfsd_setattr already committed the child (and possibly also the parent). 1481 */ 1482 if (!err) 1483 err = nfserrno(commit_metadata(fhp)); 1484 1485 mnt_drop_write(fhp->fh_export->ex_path.mnt); 1486 /* 1487 * Update the filehandle to get the new inode info. 1488 */ 1489 if (!err) 1490 err = fh_update(resfhp); 1491 1492 out: 1493 fh_unlock(fhp); 1494 if (dchild && !IS_ERR(dchild)) 1495 dput(dchild); 1496 return err; 1497 1498 out_nfserr: 1499 err = nfserrno(host_err); 1500 goto out; 1501 } 1502 #endif /* CONFIG_NFSD_V3 */ 1503 1504 /* 1505 * Read a symlink. On entry, *lenp must contain the maximum path length that 1506 * fits into the buffer. On return, it contains the true length. 1507 * N.B. After this call fhp needs an fh_put 1508 */ 1509 __be32 1510 nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp) 1511 { 1512 struct dentry *dentry; 1513 struct inode *inode; 1514 mm_segment_t oldfs; 1515 __be32 err; 1516 int host_err; 1517 1518 err = fh_verify(rqstp, fhp, S_IFLNK, NFSD_MAY_NOP); 1519 if (err) 1520 goto out; 1521 1522 dentry = fhp->fh_dentry; 1523 inode = dentry->d_inode; 1524 1525 err = nfserr_inval; 1526 if (!inode->i_op->readlink) 1527 goto out; 1528 1529 touch_atime(fhp->fh_export->ex_path.mnt, dentry); 1530 /* N.B. Why does this call need a get_fs()?? 1531 * Remove the set_fs and watch the fireworks:-) --okir 1532 */ 1533 1534 oldfs = get_fs(); set_fs(KERNEL_DS); 1535 host_err = inode->i_op->readlink(dentry, buf, *lenp); 1536 set_fs(oldfs); 1537 1538 if (host_err < 0) 1539 goto out_nfserr; 1540 *lenp = host_err; 1541 err = 0; 1542 out: 1543 return err; 1544 1545 out_nfserr: 1546 err = nfserrno(host_err); 1547 goto out; 1548 } 1549 1550 /* 1551 * Create a symlink and look up its inode 1552 * N.B. After this call _both_ fhp and resfhp need an fh_put 1553 */ 1554 __be32 1555 nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, 1556 char *fname, int flen, 1557 char *path, int plen, 1558 struct svc_fh *resfhp, 1559 struct iattr *iap) 1560 { 1561 struct dentry *dentry, *dnew; 1562 __be32 err, cerr; 1563 int host_err; 1564 1565 err = nfserr_noent; 1566 if (!flen || !plen) 1567 goto out; 1568 err = nfserr_exist; 1569 if (isdotent(fname, flen)) 1570 goto out; 1571 1572 err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE); 1573 if (err) 1574 goto out; 1575 fh_lock(fhp); 1576 dentry = fhp->fh_dentry; 1577 dnew = lookup_one_len(fname, dentry, flen); 1578 host_err = PTR_ERR(dnew); 1579 if (IS_ERR(dnew)) 1580 goto out_nfserr; 1581 1582 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt); 1583 if (host_err) 1584 goto out_nfserr; 1585 1586 if (unlikely(path[plen] != 0)) { 1587 char *path_alloced = kmalloc(plen+1, GFP_KERNEL); 1588 if (path_alloced == NULL) 1589 host_err = -ENOMEM; 1590 else { 1591 strncpy(path_alloced, path, plen); 1592 path_alloced[plen] = 0; 1593 host_err = vfs_symlink(dentry->d_inode, dnew, path_alloced); 1594 kfree(path_alloced); 1595 } 1596 } else 1597 host_err = vfs_symlink(dentry->d_inode, dnew, path); 1598 err = nfserrno(host_err); 1599 if (!err) 1600 err = nfserrno(commit_metadata(fhp)); 1601 fh_unlock(fhp); 1602 1603 mnt_drop_write(fhp->fh_export->ex_path.mnt); 1604 1605 cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp); 1606 dput(dnew); 1607 if (err==0) err = cerr; 1608 out: 1609 return err; 1610 1611 out_nfserr: 1612 err = nfserrno(host_err); 1613 goto out; 1614 } 1615 1616 /* 1617 * Create a hardlink 1618 * N.B. After this call _both_ ffhp and tfhp need an fh_put 1619 */ 1620 __be32 1621 nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, 1622 char *name, int len, struct svc_fh *tfhp) 1623 { 1624 struct dentry *ddir, *dnew, *dold; 1625 struct inode *dirp; 1626 __be32 err; 1627 int host_err; 1628 1629 err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_CREATE); 1630 if (err) 1631 goto out; 1632 err = fh_verify(rqstp, tfhp, -S_IFDIR, NFSD_MAY_NOP); 1633 if (err) 1634 goto out; 1635 1636 err = nfserr_perm; 1637 if (!len) 1638 goto out; 1639 err = nfserr_exist; 1640 if (isdotent(name, len)) 1641 goto out; 1642 1643 fh_lock_nested(ffhp, I_MUTEX_PARENT); 1644 ddir = ffhp->fh_dentry; 1645 dirp = ddir->d_inode; 1646 1647 dnew = lookup_one_len(name, ddir, len); 1648 host_err = PTR_ERR(dnew); 1649 if (IS_ERR(dnew)) 1650 goto out_nfserr; 1651 1652 dold = tfhp->fh_dentry; 1653 1654 host_err = mnt_want_write(tfhp->fh_export->ex_path.mnt); 1655 if (host_err) { 1656 err = nfserrno(host_err); 1657 goto out_dput; 1658 } 1659 err = nfserr_noent; 1660 if (!dold->d_inode) 1661 goto out_drop_write; 1662 host_err = nfsd_break_lease(dold->d_inode); 1663 if (host_err) 1664 goto out_drop_write; 1665 host_err = vfs_link(dold, dirp, dnew); 1666 if (!host_err) { 1667 err = nfserrno(commit_metadata(ffhp)); 1668 if (!err) 1669 err = nfserrno(commit_metadata(tfhp)); 1670 } else { 1671 if (host_err == -EXDEV && rqstp->rq_vers == 2) 1672 err = nfserr_acces; 1673 else 1674 err = nfserrno(host_err); 1675 } 1676 out_drop_write: 1677 mnt_drop_write(tfhp->fh_export->ex_path.mnt); 1678 out_dput: 1679 dput(dnew); 1680 out_unlock: 1681 fh_unlock(ffhp); 1682 out: 1683 return err; 1684 1685 out_nfserr: 1686 err = nfserrno(host_err); 1687 goto out_unlock; 1688 } 1689 1690 /* 1691 * Rename a file 1692 * N.B. After this call _both_ ffhp and tfhp need an fh_put 1693 */ 1694 __be32 1695 nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, 1696 struct svc_fh *tfhp, char *tname, int tlen) 1697 { 1698 struct dentry *fdentry, *tdentry, *odentry, *ndentry, *trap; 1699 struct inode *fdir, *tdir; 1700 __be32 err; 1701 int host_err; 1702 1703 err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_REMOVE); 1704 if (err) 1705 goto out; 1706 err = fh_verify(rqstp, tfhp, S_IFDIR, NFSD_MAY_CREATE); 1707 if (err) 1708 goto out; 1709 1710 fdentry = ffhp->fh_dentry; 1711 fdir = fdentry->d_inode; 1712 1713 tdentry = tfhp->fh_dentry; 1714 tdir = tdentry->d_inode; 1715 1716 err = (rqstp->rq_vers == 2) ? nfserr_acces : nfserr_xdev; 1717 if (ffhp->fh_export != tfhp->fh_export) 1718 goto out; 1719 1720 err = nfserr_perm; 1721 if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen)) 1722 goto out; 1723 1724 /* cannot use fh_lock as we need deadlock protective ordering 1725 * so do it by hand */ 1726 trap = lock_rename(tdentry, fdentry); 1727 ffhp->fh_locked = tfhp->fh_locked = 1; 1728 fill_pre_wcc(ffhp); 1729 fill_pre_wcc(tfhp); 1730 1731 odentry = lookup_one_len(fname, fdentry, flen); 1732 host_err = PTR_ERR(odentry); 1733 if (IS_ERR(odentry)) 1734 goto out_nfserr; 1735 1736 host_err = -ENOENT; 1737 if (!odentry->d_inode) 1738 goto out_dput_old; 1739 host_err = -EINVAL; 1740 if (odentry == trap) 1741 goto out_dput_old; 1742 1743 ndentry = lookup_one_len(tname, tdentry, tlen); 1744 host_err = PTR_ERR(ndentry); 1745 if (IS_ERR(ndentry)) 1746 goto out_dput_old; 1747 host_err = -ENOTEMPTY; 1748 if (ndentry == trap) 1749 goto out_dput_new; 1750 1751 host_err = -EXDEV; 1752 if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt) 1753 goto out_dput_new; 1754 host_err = mnt_want_write(ffhp->fh_export->ex_path.mnt); 1755 if (host_err) 1756 goto out_dput_new; 1757 1758 host_err = nfsd_break_lease(odentry->d_inode); 1759 if (host_err) 1760 goto out_drop_write; 1761 if (ndentry->d_inode) { 1762 host_err = nfsd_break_lease(ndentry->d_inode); 1763 if (host_err) 1764 goto out_drop_write; 1765 } 1766 host_err = vfs_rename(fdir, odentry, tdir, ndentry); 1767 if (!host_err) { 1768 host_err = commit_metadata(tfhp); 1769 if (!host_err) 1770 host_err = commit_metadata(ffhp); 1771 } 1772 out_drop_write: 1773 mnt_drop_write(ffhp->fh_export->ex_path.mnt); 1774 out_dput_new: 1775 dput(ndentry); 1776 out_dput_old: 1777 dput(odentry); 1778 out_nfserr: 1779 err = nfserrno(host_err); 1780 1781 /* we cannot reply on fh_unlock on the two filehandles, 1782 * as that would do the wrong thing if the two directories 1783 * were the same, so again we do it by hand 1784 */ 1785 fill_post_wcc(ffhp); 1786 fill_post_wcc(tfhp); 1787 unlock_rename(tdentry, fdentry); 1788 ffhp->fh_locked = tfhp->fh_locked = 0; 1789 1790 out: 1791 return err; 1792 } 1793 1794 /* 1795 * Unlink a file or directory 1796 * N.B. After this call fhp needs an fh_put 1797 */ 1798 __be32 1799 nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, 1800 char *fname, int flen) 1801 { 1802 struct dentry *dentry, *rdentry; 1803 struct inode *dirp; 1804 __be32 err; 1805 int host_err; 1806 1807 err = nfserr_acces; 1808 if (!flen || isdotent(fname, flen)) 1809 goto out; 1810 err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_REMOVE); 1811 if (err) 1812 goto out; 1813 1814 fh_lock_nested(fhp, I_MUTEX_PARENT); 1815 dentry = fhp->fh_dentry; 1816 dirp = dentry->d_inode; 1817 1818 rdentry = lookup_one_len(fname, dentry, flen); 1819 host_err = PTR_ERR(rdentry); 1820 if (IS_ERR(rdentry)) 1821 goto out_nfserr; 1822 1823 if (!rdentry->d_inode) { 1824 dput(rdentry); 1825 err = nfserr_noent; 1826 goto out; 1827 } 1828 1829 if (!type) 1830 type = rdentry->d_inode->i_mode & S_IFMT; 1831 1832 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt); 1833 if (host_err) 1834 goto out_put; 1835 1836 host_err = nfsd_break_lease(rdentry->d_inode); 1837 if (host_err) 1838 goto out_drop_write; 1839 if (type != S_IFDIR) 1840 host_err = vfs_unlink(dirp, rdentry); 1841 else 1842 host_err = vfs_rmdir(dirp, rdentry); 1843 if (!host_err) 1844 host_err = commit_metadata(fhp); 1845 out_drop_write: 1846 mnt_drop_write(fhp->fh_export->ex_path.mnt); 1847 out_put: 1848 dput(rdentry); 1849 1850 out_nfserr: 1851 err = nfserrno(host_err); 1852 out: 1853 return err; 1854 } 1855 1856 /* 1857 * We do this buffering because we must not call back into the file 1858 * system's ->lookup() method from the filldir callback. That may well 1859 * deadlock a number of file systems. 1860 * 1861 * This is based heavily on the implementation of same in XFS. 1862 */ 1863 struct buffered_dirent { 1864 u64 ino; 1865 loff_t offset; 1866 int namlen; 1867 unsigned int d_type; 1868 char name[]; 1869 }; 1870 1871 struct readdir_data { 1872 char *dirent; 1873 size_t used; 1874 int full; 1875 }; 1876 1877 static int nfsd_buffered_filldir(void *__buf, const char *name, int namlen, 1878 loff_t offset, u64 ino, unsigned int d_type) 1879 { 1880 struct readdir_data *buf = __buf; 1881 struct buffered_dirent *de = (void *)(buf->dirent + buf->used); 1882 unsigned int reclen; 1883 1884 reclen = ALIGN(sizeof(struct buffered_dirent) + namlen, sizeof(u64)); 1885 if (buf->used + reclen > PAGE_SIZE) { 1886 buf->full = 1; 1887 return -EINVAL; 1888 } 1889 1890 de->namlen = namlen; 1891 de->offset = offset; 1892 de->ino = ino; 1893 de->d_type = d_type; 1894 memcpy(de->name, name, namlen); 1895 buf->used += reclen; 1896 1897 return 0; 1898 } 1899 1900 static __be32 nfsd_buffered_readdir(struct file *file, filldir_t func, 1901 struct readdir_cd *cdp, loff_t *offsetp) 1902 { 1903 struct readdir_data buf; 1904 struct buffered_dirent *de; 1905 int host_err; 1906 int size; 1907 loff_t offset; 1908 1909 buf.dirent = (void *)__get_free_page(GFP_KERNEL); 1910 if (!buf.dirent) 1911 return nfserrno(-ENOMEM); 1912 1913 offset = *offsetp; 1914 1915 while (1) { 1916 struct inode *dir_inode = file->f_path.dentry->d_inode; 1917 unsigned int reclen; 1918 1919 cdp->err = nfserr_eof; /* will be cleared on successful read */ 1920 buf.used = 0; 1921 buf.full = 0; 1922 1923 host_err = vfs_readdir(file, nfsd_buffered_filldir, &buf); 1924 if (buf.full) 1925 host_err = 0; 1926 1927 if (host_err < 0) 1928 break; 1929 1930 size = buf.used; 1931 1932 if (!size) 1933 break; 1934 1935 /* 1936 * Various filldir functions may end up calling back into 1937 * lookup_one_len() and the file system's ->lookup() method. 1938 * These expect i_mutex to be held, as it would within readdir. 1939 */ 1940 host_err = mutex_lock_killable(&dir_inode->i_mutex); 1941 if (host_err) 1942 break; 1943 1944 de = (struct buffered_dirent *)buf.dirent; 1945 while (size > 0) { 1946 offset = de->offset; 1947 1948 if (func(cdp, de->name, de->namlen, de->offset, 1949 de->ino, de->d_type)) 1950 break; 1951 1952 if (cdp->err != nfs_ok) 1953 break; 1954 1955 reclen = ALIGN(sizeof(*de) + de->namlen, 1956 sizeof(u64)); 1957 size -= reclen; 1958 de = (struct buffered_dirent *)((char *)de + reclen); 1959 } 1960 mutex_unlock(&dir_inode->i_mutex); 1961 if (size > 0) /* We bailed out early */ 1962 break; 1963 1964 offset = vfs_llseek(file, 0, SEEK_CUR); 1965 } 1966 1967 free_page((unsigned long)(buf.dirent)); 1968 1969 if (host_err) 1970 return nfserrno(host_err); 1971 1972 *offsetp = offset; 1973 return cdp->err; 1974 } 1975 1976 /* 1977 * Read entries from a directory. 1978 * The NFSv3/4 verifier we ignore for now. 1979 */ 1980 __be32 1981 nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp, 1982 struct readdir_cd *cdp, filldir_t func) 1983 { 1984 __be32 err; 1985 struct file *file; 1986 loff_t offset = *offsetp; 1987 1988 err = nfsd_open(rqstp, fhp, S_IFDIR, NFSD_MAY_READ, &file); 1989 if (err) 1990 goto out; 1991 1992 offset = vfs_llseek(file, offset, 0); 1993 if (offset < 0) { 1994 err = nfserrno((int)offset); 1995 goto out_close; 1996 } 1997 1998 err = nfsd_buffered_readdir(file, func, cdp, offsetp); 1999 2000 if (err == nfserr_eof || err == nfserr_toosmall) 2001 err = nfs_ok; /* can still be found in ->err */ 2002 out_close: 2003 nfsd_close(file); 2004 out: 2005 return err; 2006 } 2007 2008 /* 2009 * Get file system stats 2010 * N.B. After this call fhp needs an fh_put 2011 */ 2012 __be32 2013 nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat, int access) 2014 { 2015 __be32 err; 2016 2017 err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP | access); 2018 if (!err) { 2019 struct path path = { 2020 .mnt = fhp->fh_export->ex_path.mnt, 2021 .dentry = fhp->fh_dentry, 2022 }; 2023 if (vfs_statfs(&path, stat)) 2024 err = nfserr_io; 2025 } 2026 return err; 2027 } 2028 2029 static int exp_rdonly(struct svc_rqst *rqstp, struct svc_export *exp) 2030 { 2031 return nfsexp_flags(rqstp, exp) & NFSEXP_READONLY; 2032 } 2033 2034 /* 2035 * Check for a user's access permissions to this inode. 2036 */ 2037 __be32 2038 nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp, 2039 struct dentry *dentry, int acc) 2040 { 2041 struct inode *inode = dentry->d_inode; 2042 int err; 2043 2044 if ((acc & NFSD_MAY_MASK) == NFSD_MAY_NOP) 2045 return 0; 2046 #if 0 2047 dprintk("nfsd: permission 0x%x%s%s%s%s%s%s%s mode 0%o%s%s%s\n", 2048 acc, 2049 (acc & NFSD_MAY_READ)? " read" : "", 2050 (acc & NFSD_MAY_WRITE)? " write" : "", 2051 (acc & NFSD_MAY_EXEC)? " exec" : "", 2052 (acc & NFSD_MAY_SATTR)? " sattr" : "", 2053 (acc & NFSD_MAY_TRUNC)? " trunc" : "", 2054 (acc & NFSD_MAY_LOCK)? " lock" : "", 2055 (acc & NFSD_MAY_OWNER_OVERRIDE)? " owneroverride" : "", 2056 inode->i_mode, 2057 IS_IMMUTABLE(inode)? " immut" : "", 2058 IS_APPEND(inode)? " append" : "", 2059 __mnt_is_readonly(exp->ex_path.mnt)? " ro" : ""); 2060 dprintk(" owner %d/%d user %d/%d\n", 2061 inode->i_uid, inode->i_gid, current_fsuid(), current_fsgid()); 2062 #endif 2063 2064 /* Normally we reject any write/sattr etc access on a read-only file 2065 * system. But if it is IRIX doing check on write-access for a 2066 * device special file, we ignore rofs. 2067 */ 2068 if (!(acc & NFSD_MAY_LOCAL_ACCESS)) 2069 if (acc & (NFSD_MAY_WRITE | NFSD_MAY_SATTR | NFSD_MAY_TRUNC)) { 2070 if (exp_rdonly(rqstp, exp) || 2071 __mnt_is_readonly(exp->ex_path.mnt)) 2072 return nfserr_rofs; 2073 if (/* (acc & NFSD_MAY_WRITE) && */ IS_IMMUTABLE(inode)) 2074 return nfserr_perm; 2075 } 2076 if ((acc & NFSD_MAY_TRUNC) && IS_APPEND(inode)) 2077 return nfserr_perm; 2078 2079 if (acc & NFSD_MAY_LOCK) { 2080 /* If we cannot rely on authentication in NLM requests, 2081 * just allow locks, otherwise require read permission, or 2082 * ownership 2083 */ 2084 if (exp->ex_flags & NFSEXP_NOAUTHNLM) 2085 return 0; 2086 else 2087 acc = NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE; 2088 } 2089 /* 2090 * The file owner always gets access permission for accesses that 2091 * would normally be checked at open time. This is to make 2092 * file access work even when the client has done a fchmod(fd, 0). 2093 * 2094 * However, `cp foo bar' should fail nevertheless when bar is 2095 * readonly. A sensible way to do this might be to reject all 2096 * attempts to truncate a read-only file, because a creat() call 2097 * always implies file truncation. 2098 * ... but this isn't really fair. A process may reasonably call 2099 * ftruncate on an open file descriptor on a file with perm 000. 2100 * We must trust the client to do permission checking - using "ACCESS" 2101 * with NFSv3. 2102 */ 2103 if ((acc & NFSD_MAY_OWNER_OVERRIDE) && 2104 inode->i_uid == current_fsuid()) 2105 return 0; 2106 2107 /* This assumes NFSD_MAY_{READ,WRITE,EXEC} == MAY_{READ,WRITE,EXEC} */ 2108 err = inode_permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC)); 2109 2110 /* Allow read access to binaries even when mode 111 */ 2111 if (err == -EACCES && S_ISREG(inode->i_mode) && 2112 acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE)) 2113 err = inode_permission(inode, MAY_EXEC); 2114 2115 return err? nfserrno(err) : 0; 2116 } 2117 2118 void 2119 nfsd_racache_shutdown(void) 2120 { 2121 struct raparms *raparm, *last_raparm; 2122 unsigned int i; 2123 2124 dprintk("nfsd: freeing readahead buffers.\n"); 2125 2126 for (i = 0; i < RAPARM_HASH_SIZE; i++) { 2127 raparm = raparm_hash[i].pb_head; 2128 while(raparm) { 2129 last_raparm = raparm; 2130 raparm = raparm->p_next; 2131 kfree(last_raparm); 2132 } 2133 raparm_hash[i].pb_head = NULL; 2134 } 2135 } 2136 /* 2137 * Initialize readahead param cache 2138 */ 2139 int 2140 nfsd_racache_init(int cache_size) 2141 { 2142 int i; 2143 int j = 0; 2144 int nperbucket; 2145 struct raparms **raparm = NULL; 2146 2147 2148 if (raparm_hash[0].pb_head) 2149 return 0; 2150 nperbucket = DIV_ROUND_UP(cache_size, RAPARM_HASH_SIZE); 2151 if (nperbucket < 2) 2152 nperbucket = 2; 2153 cache_size = nperbucket * RAPARM_HASH_SIZE; 2154 2155 dprintk("nfsd: allocating %d readahead buffers.\n", cache_size); 2156 2157 for (i = 0; i < RAPARM_HASH_SIZE; i++) { 2158 spin_lock_init(&raparm_hash[i].pb_lock); 2159 2160 raparm = &raparm_hash[i].pb_head; 2161 for (j = 0; j < nperbucket; j++) { 2162 *raparm = kzalloc(sizeof(struct raparms), GFP_KERNEL); 2163 if (!*raparm) 2164 goto out_nomem; 2165 raparm = &(*raparm)->p_next; 2166 } 2167 *raparm = NULL; 2168 } 2169 2170 nfsdstats.ra_size = cache_size; 2171 return 0; 2172 2173 out_nomem: 2174 dprintk("nfsd: kmalloc failed, freeing readahead buffers\n"); 2175 nfsd_racache_shutdown(); 2176 return -ENOMEM; 2177 } 2178 2179 #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) 2180 struct posix_acl * 2181 nfsd_get_posix_acl(struct svc_fh *fhp, int type) 2182 { 2183 struct inode *inode = fhp->fh_dentry->d_inode; 2184 char *name; 2185 void *value = NULL; 2186 ssize_t size; 2187 struct posix_acl *acl; 2188 2189 if (!IS_POSIXACL(inode)) 2190 return ERR_PTR(-EOPNOTSUPP); 2191 2192 switch (type) { 2193 case ACL_TYPE_ACCESS: 2194 name = POSIX_ACL_XATTR_ACCESS; 2195 break; 2196 case ACL_TYPE_DEFAULT: 2197 name = POSIX_ACL_XATTR_DEFAULT; 2198 break; 2199 default: 2200 return ERR_PTR(-EOPNOTSUPP); 2201 } 2202 2203 size = nfsd_getxattr(fhp->fh_dentry, name, &value); 2204 if (size < 0) 2205 return ERR_PTR(size); 2206 2207 acl = posix_acl_from_xattr(value, size); 2208 kfree(value); 2209 return acl; 2210 } 2211 2212 int 2213 nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl) 2214 { 2215 struct inode *inode = fhp->fh_dentry->d_inode; 2216 char *name; 2217 void *value = NULL; 2218 size_t size; 2219 int error; 2220 2221 if (!IS_POSIXACL(inode) || 2222 !inode->i_op->setxattr || !inode->i_op->removexattr) 2223 return -EOPNOTSUPP; 2224 switch(type) { 2225 case ACL_TYPE_ACCESS: 2226 name = POSIX_ACL_XATTR_ACCESS; 2227 break; 2228 case ACL_TYPE_DEFAULT: 2229 name = POSIX_ACL_XATTR_DEFAULT; 2230 break; 2231 default: 2232 return -EOPNOTSUPP; 2233 } 2234 2235 if (acl && acl->a_count) { 2236 size = posix_acl_xattr_size(acl->a_count); 2237 value = kmalloc(size, GFP_KERNEL); 2238 if (!value) 2239 return -ENOMEM; 2240 error = posix_acl_to_xattr(acl, value, size); 2241 if (error < 0) 2242 goto getout; 2243 size = error; 2244 } else 2245 size = 0; 2246 2247 error = mnt_want_write(fhp->fh_export->ex_path.mnt); 2248 if (error) 2249 goto getout; 2250 if (size) 2251 error = vfs_setxattr(fhp->fh_dentry, name, value, size, 0); 2252 else { 2253 if (!S_ISDIR(inode->i_mode) && type == ACL_TYPE_DEFAULT) 2254 error = 0; 2255 else { 2256 error = vfs_removexattr(fhp->fh_dentry, name); 2257 if (error == -ENODATA) 2258 error = 0; 2259 } 2260 } 2261 mnt_drop_write(fhp->fh_export->ex_path.mnt); 2262 2263 getout: 2264 kfree(value); 2265 return error; 2266 } 2267 #endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */ 2268