1 /* 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. 4 * 5 * This copyrighted material is made available to anyone wishing to use, 6 * modify, copy, or redistribute it subject to the terms and conditions 7 * of the GNU General Public License version 2. 8 */ 9 10 #include <linux/sched.h> 11 #include <linux/slab.h> 12 #include <linux/spinlock.h> 13 #include <linux/completion.h> 14 #include <linux/buffer_head.h> 15 #include <linux/posix_acl.h> 16 #include <linux/sort.h> 17 #include <linux/gfs2_ondisk.h> 18 #include <linux/crc32.h> 19 #include <linux/lm_interface.h> 20 #include <linux/security.h> 21 22 #include "gfs2.h" 23 #include "incore.h" 24 #include "acl.h" 25 #include "bmap.h" 26 #include "dir.h" 27 #include "eattr.h" 28 #include "glock.h" 29 #include "glops.h" 30 #include "inode.h" 31 #include "log.h" 32 #include "meta_io.h" 33 #include "ops_address.h" 34 #include "ops_file.h" 35 #include "ops_inode.h" 36 #include "quota.h" 37 #include "rgrp.h" 38 #include "trans.h" 39 #include "util.h" 40 41 struct gfs2_inum_range_host { 42 u64 ir_start; 43 u64 ir_length; 44 }; 45 46 static int iget_test(struct inode *inode, void *opaque) 47 { 48 struct gfs2_inode *ip = GFS2_I(inode); 49 u64 *no_addr = opaque; 50 51 if (ip->i_no_addr == *no_addr && 52 inode->i_private != NULL) 53 return 1; 54 55 return 0; 56 } 57 58 static int iget_set(struct inode *inode, void *opaque) 59 { 60 struct gfs2_inode *ip = GFS2_I(inode); 61 u64 *no_addr = opaque; 62 63 inode->i_ino = (unsigned long)*no_addr; 64 ip->i_no_addr = *no_addr; 65 return 0; 66 } 67 68 struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr) 69 { 70 unsigned long hash = (unsigned long)no_addr; 71 return ilookup5(sb, hash, iget_test, &no_addr); 72 } 73 74 static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr) 75 { 76 unsigned long hash = (unsigned long)no_addr; 77 return iget5_locked(sb, hash, iget_test, iget_set, &no_addr); 78 } 79 80 /** 81 * gfs2_inode_lookup - Lookup an inode 82 * @sb: The super block 83 * @no_addr: The inode number 84 * @type: The type of the inode 85 * 86 * Returns: A VFS inode, or an error 87 */ 88 89 struct inode *gfs2_inode_lookup(struct super_block *sb, u64 no_addr, unsigned int type) 90 { 91 struct inode *inode = gfs2_iget(sb, no_addr); 92 struct gfs2_inode *ip = GFS2_I(inode); 93 struct gfs2_glock *io_gl; 94 int error; 95 96 if (!inode) 97 return ERR_PTR(-ENOBUFS); 98 99 if (inode->i_state & I_NEW) { 100 struct gfs2_sbd *sdp = GFS2_SB(inode); 101 umode_t mode; 102 inode->i_private = ip; 103 104 error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl); 105 if (unlikely(error)) 106 goto fail; 107 ip->i_gl->gl_object = ip; 108 109 error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl); 110 if (unlikely(error)) 111 goto fail_put; 112 113 set_bit(GIF_INVALID, &ip->i_flags); 114 error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); 115 if (unlikely(error)) 116 goto fail_iopen; 117 118 gfs2_glock_put(io_gl); 119 120 /* 121 * We must read the inode in order to work out its type in 122 * this case. Note that this doesn't happen often as we normally 123 * know the type beforehand. This code path only occurs during 124 * unlinked inode recovery (where it is safe to do this glock, 125 * which is not true in the general case). 126 */ 127 inode->i_mode = mode = DT2IF(type); 128 if (type == DT_UNKNOWN) { 129 struct gfs2_holder gh; 130 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); 131 if (unlikely(error)) 132 goto fail_glock; 133 /* Inode is now uptodate */ 134 mode = inode->i_mode; 135 gfs2_glock_dq_uninit(&gh); 136 } 137 138 if (S_ISREG(mode)) { 139 inode->i_op = &gfs2_file_iops; 140 inode->i_fop = &gfs2_file_fops; 141 inode->i_mapping->a_ops = &gfs2_file_aops; 142 } else if (S_ISDIR(mode)) { 143 inode->i_op = &gfs2_dir_iops; 144 inode->i_fop = &gfs2_dir_fops; 145 } else if (S_ISLNK(mode)) { 146 inode->i_op = &gfs2_symlink_iops; 147 } else { 148 inode->i_op = &gfs2_dev_iops; 149 } 150 151 unlock_new_inode(inode); 152 } 153 154 return inode; 155 fail_glock: 156 gfs2_glock_dq(&ip->i_iopen_gh); 157 fail_iopen: 158 gfs2_glock_put(io_gl); 159 fail_put: 160 ip->i_gl->gl_object = NULL; 161 gfs2_glock_put(ip->i_gl); 162 fail: 163 iput(inode); 164 return ERR_PTR(error); 165 } 166 167 static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) 168 { 169 struct gfs2_dinode_host *di = &ip->i_di; 170 const struct gfs2_dinode *str = buf; 171 172 if (ip->i_no_addr != be64_to_cpu(str->di_num.no_addr)) { 173 if (gfs2_consist_inode(ip)) 174 gfs2_dinode_print(ip); 175 return -EIO; 176 } 177 ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino); 178 ip->i_inode.i_mode = be32_to_cpu(str->di_mode); 179 ip->i_inode.i_rdev = 0; 180 switch (ip->i_inode.i_mode & S_IFMT) { 181 case S_IFBLK: 182 case S_IFCHR: 183 ip->i_inode.i_rdev = MKDEV(be32_to_cpu(str->di_major), 184 be32_to_cpu(str->di_minor)); 185 break; 186 }; 187 188 ip->i_inode.i_uid = be32_to_cpu(str->di_uid); 189 ip->i_inode.i_gid = be32_to_cpu(str->di_gid); 190 /* 191 * We will need to review setting the nlink count here in the 192 * light of the forthcoming ro bind mount work. This is a reminder 193 * to do that. 194 */ 195 ip->i_inode.i_nlink = be32_to_cpu(str->di_nlink); 196 di->di_size = be64_to_cpu(str->di_size); 197 i_size_write(&ip->i_inode, di->di_size); 198 di->di_blocks = be64_to_cpu(str->di_blocks); 199 gfs2_set_inode_blocks(&ip->i_inode); 200 ip->i_inode.i_atime.tv_sec = be64_to_cpu(str->di_atime); 201 ip->i_inode.i_atime.tv_nsec = be32_to_cpu(str->di_atime_nsec); 202 ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime); 203 ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec); 204 ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime); 205 ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec); 206 207 di->di_goal_meta = be64_to_cpu(str->di_goal_meta); 208 di->di_goal_data = be64_to_cpu(str->di_goal_data); 209 di->di_generation = be64_to_cpu(str->di_generation); 210 211 di->di_flags = be32_to_cpu(str->di_flags); 212 gfs2_set_inode_flags(&ip->i_inode); 213 di->di_height = be16_to_cpu(str->di_height); 214 215 di->di_depth = be16_to_cpu(str->di_depth); 216 di->di_entries = be32_to_cpu(str->di_entries); 217 218 di->di_eattr = be64_to_cpu(str->di_eattr); 219 return 0; 220 } 221 222 /** 223 * gfs2_inode_refresh - Refresh the incore copy of the dinode 224 * @ip: The GFS2 inode 225 * 226 * Returns: errno 227 */ 228 229 int gfs2_inode_refresh(struct gfs2_inode *ip) 230 { 231 struct buffer_head *dibh; 232 int error; 233 234 error = gfs2_meta_inode_buffer(ip, &dibh); 235 if (error) 236 return error; 237 238 if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), dibh, GFS2_METATYPE_DI)) { 239 brelse(dibh); 240 return -EIO; 241 } 242 243 error = gfs2_dinode_in(ip, dibh->b_data); 244 brelse(dibh); 245 clear_bit(GIF_INVALID, &ip->i_flags); 246 247 return error; 248 } 249 250 int gfs2_dinode_dealloc(struct gfs2_inode *ip) 251 { 252 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 253 struct gfs2_alloc *al; 254 struct gfs2_rgrpd *rgd; 255 int error; 256 257 if (ip->i_di.di_blocks != 1) { 258 if (gfs2_consist_inode(ip)) 259 gfs2_dinode_print(ip); 260 return -EIO; 261 } 262 263 al = gfs2_alloc_get(ip); 264 265 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 266 if (error) 267 goto out; 268 269 error = gfs2_rindex_hold(sdp, &al->al_ri_gh); 270 if (error) 271 goto out_qs; 272 273 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr); 274 if (!rgd) { 275 gfs2_consist_inode(ip); 276 error = -EIO; 277 goto out_rindex_relse; 278 } 279 280 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, 281 &al->al_rgd_gh); 282 if (error) 283 goto out_rindex_relse; 284 285 error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA, 1); 286 if (error) 287 goto out_rg_gunlock; 288 289 gfs2_trans_add_gl(ip->i_gl); 290 291 gfs2_free_di(rgd, ip); 292 293 gfs2_trans_end(sdp); 294 clear_bit(GLF_STICKY, &ip->i_gl->gl_flags); 295 296 out_rg_gunlock: 297 gfs2_glock_dq_uninit(&al->al_rgd_gh); 298 out_rindex_relse: 299 gfs2_glock_dq_uninit(&al->al_ri_gh); 300 out_qs: 301 gfs2_quota_unhold(ip); 302 out: 303 gfs2_alloc_put(ip); 304 return error; 305 } 306 307 /** 308 * gfs2_change_nlink - Change nlink count on inode 309 * @ip: The GFS2 inode 310 * @diff: The change in the nlink count required 311 * 312 * Returns: errno 313 */ 314 int gfs2_change_nlink(struct gfs2_inode *ip, int diff) 315 { 316 struct buffer_head *dibh; 317 u32 nlink; 318 int error; 319 320 BUG_ON(diff != 1 && diff != -1); 321 nlink = ip->i_inode.i_nlink + diff; 322 323 /* If we are reducing the nlink count, but the new value ends up being 324 bigger than the old one, we must have underflowed. */ 325 if (diff < 0 && nlink > ip->i_inode.i_nlink) { 326 if (gfs2_consist_inode(ip)) 327 gfs2_dinode_print(ip); 328 return -EIO; 329 } 330 331 error = gfs2_meta_inode_buffer(ip, &dibh); 332 if (error) 333 return error; 334 335 if (diff > 0) 336 inc_nlink(&ip->i_inode); 337 else 338 drop_nlink(&ip->i_inode); 339 340 ip->i_inode.i_ctime = CURRENT_TIME; 341 342 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 343 gfs2_dinode_out(ip, dibh->b_data); 344 brelse(dibh); 345 mark_inode_dirty(&ip->i_inode); 346 347 if (ip->i_inode.i_nlink == 0) 348 gfs2_unlink_di(&ip->i_inode); /* mark inode unlinked */ 349 350 return error; 351 } 352 353 struct inode *gfs2_lookup_simple(struct inode *dip, const char *name) 354 { 355 struct qstr qstr; 356 struct inode *inode; 357 gfs2_str2qstr(&qstr, name); 358 inode = gfs2_lookupi(dip, &qstr, 1, NULL); 359 /* gfs2_lookupi has inconsistent callers: vfs 360 * related routines expect NULL for no entry found, 361 * gfs2_lookup_simple callers expect ENOENT 362 * and do not check for NULL. 363 */ 364 if (inode == NULL) 365 return ERR_PTR(-ENOENT); 366 else 367 return inode; 368 } 369 370 371 /** 372 * gfs2_lookupi - Look up a filename in a directory and return its inode 373 * @d_gh: An initialized holder for the directory glock 374 * @name: The name of the inode to look for 375 * @is_root: If 1, ignore the caller's permissions 376 * @i_gh: An uninitialized holder for the new inode glock 377 * 378 * This can be called via the VFS filldir function when NFS is doing 379 * a readdirplus and the inode which its intending to stat isn't 380 * already in cache. In this case we must not take the directory glock 381 * again, since the readdir call will have already taken that lock. 382 * 383 * Returns: errno 384 */ 385 386 struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, 387 int is_root, struct nameidata *nd) 388 { 389 struct super_block *sb = dir->i_sb; 390 struct gfs2_inode *dip = GFS2_I(dir); 391 struct gfs2_holder d_gh; 392 int error = 0; 393 struct inode *inode = NULL; 394 int unlock = 0; 395 396 if (!name->len || name->len > GFS2_FNAMESIZE) 397 return ERR_PTR(-ENAMETOOLONG); 398 399 if ((name->len == 1 && memcmp(name->name, ".", 1) == 0) || 400 (name->len == 2 && memcmp(name->name, "..", 2) == 0 && 401 dir == sb->s_root->d_inode)) { 402 igrab(dir); 403 return dir; 404 } 405 406 if (gfs2_glock_is_locked_by_me(dip->i_gl) == 0) { 407 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); 408 if (error) 409 return ERR_PTR(error); 410 unlock = 1; 411 } 412 413 if (!is_root) { 414 error = permission(dir, MAY_EXEC, NULL); 415 if (error) 416 goto out; 417 } 418 419 inode = gfs2_dir_search(dir, name); 420 if (IS_ERR(inode)) 421 error = PTR_ERR(inode); 422 out: 423 if (unlock) 424 gfs2_glock_dq_uninit(&d_gh); 425 if (error == -ENOENT) 426 return NULL; 427 return inode ? inode : ERR_PTR(error); 428 } 429 430 static void gfs2_inum_range_in(struct gfs2_inum_range_host *ir, const void *buf) 431 { 432 const struct gfs2_inum_range *str = buf; 433 434 ir->ir_start = be64_to_cpu(str->ir_start); 435 ir->ir_length = be64_to_cpu(str->ir_length); 436 } 437 438 static void gfs2_inum_range_out(const struct gfs2_inum_range_host *ir, void *buf) 439 { 440 struct gfs2_inum_range *str = buf; 441 442 str->ir_start = cpu_to_be64(ir->ir_start); 443 str->ir_length = cpu_to_be64(ir->ir_length); 444 } 445 446 static int pick_formal_ino_1(struct gfs2_sbd *sdp, u64 *formal_ino) 447 { 448 struct gfs2_inode *ip = GFS2_I(sdp->sd_ir_inode); 449 struct buffer_head *bh; 450 struct gfs2_inum_range_host ir; 451 int error; 452 453 error = gfs2_trans_begin(sdp, RES_DINODE, 0); 454 if (error) 455 return error; 456 mutex_lock(&sdp->sd_inum_mutex); 457 458 error = gfs2_meta_inode_buffer(ip, &bh); 459 if (error) { 460 mutex_unlock(&sdp->sd_inum_mutex); 461 gfs2_trans_end(sdp); 462 return error; 463 } 464 465 gfs2_inum_range_in(&ir, bh->b_data + sizeof(struct gfs2_dinode)); 466 467 if (ir.ir_length) { 468 *formal_ino = ir.ir_start++; 469 ir.ir_length--; 470 gfs2_trans_add_bh(ip->i_gl, bh, 1); 471 gfs2_inum_range_out(&ir, 472 bh->b_data + sizeof(struct gfs2_dinode)); 473 brelse(bh); 474 mutex_unlock(&sdp->sd_inum_mutex); 475 gfs2_trans_end(sdp); 476 return 0; 477 } 478 479 brelse(bh); 480 481 mutex_unlock(&sdp->sd_inum_mutex); 482 gfs2_trans_end(sdp); 483 484 return 1; 485 } 486 487 static int pick_formal_ino_2(struct gfs2_sbd *sdp, u64 *formal_ino) 488 { 489 struct gfs2_inode *ip = GFS2_I(sdp->sd_ir_inode); 490 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_inum_inode); 491 struct gfs2_holder gh; 492 struct buffer_head *bh; 493 struct gfs2_inum_range_host ir; 494 int error; 495 496 error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); 497 if (error) 498 return error; 499 500 error = gfs2_trans_begin(sdp, 2 * RES_DINODE, 0); 501 if (error) 502 goto out; 503 mutex_lock(&sdp->sd_inum_mutex); 504 505 error = gfs2_meta_inode_buffer(ip, &bh); 506 if (error) 507 goto out_end_trans; 508 509 gfs2_inum_range_in(&ir, bh->b_data + sizeof(struct gfs2_dinode)); 510 511 if (!ir.ir_length) { 512 struct buffer_head *m_bh; 513 u64 x, y; 514 __be64 z; 515 516 error = gfs2_meta_inode_buffer(m_ip, &m_bh); 517 if (error) 518 goto out_brelse; 519 520 z = *(__be64 *)(m_bh->b_data + sizeof(struct gfs2_dinode)); 521 x = y = be64_to_cpu(z); 522 ir.ir_start = x; 523 ir.ir_length = GFS2_INUM_QUANTUM; 524 x += GFS2_INUM_QUANTUM; 525 if (x < y) 526 gfs2_consist_inode(m_ip); 527 z = cpu_to_be64(x); 528 gfs2_trans_add_bh(m_ip->i_gl, m_bh, 1); 529 *(__be64 *)(m_bh->b_data + sizeof(struct gfs2_dinode)) = z; 530 531 brelse(m_bh); 532 } 533 534 *formal_ino = ir.ir_start++; 535 ir.ir_length--; 536 537 gfs2_trans_add_bh(ip->i_gl, bh, 1); 538 gfs2_inum_range_out(&ir, bh->b_data + sizeof(struct gfs2_dinode)); 539 540 out_brelse: 541 brelse(bh); 542 out_end_trans: 543 mutex_unlock(&sdp->sd_inum_mutex); 544 gfs2_trans_end(sdp); 545 out: 546 gfs2_glock_dq_uninit(&gh); 547 return error; 548 } 549 550 static int pick_formal_ino(struct gfs2_sbd *sdp, u64 *inum) 551 { 552 int error; 553 554 error = pick_formal_ino_1(sdp, inum); 555 if (error <= 0) 556 return error; 557 558 error = pick_formal_ino_2(sdp, inum); 559 560 return error; 561 } 562 563 /** 564 * create_ok - OK to create a new on-disk inode here? 565 * @dip: Directory in which dinode is to be created 566 * @name: Name of new dinode 567 * @mode: 568 * 569 * Returns: errno 570 */ 571 572 static int create_ok(struct gfs2_inode *dip, const struct qstr *name, 573 unsigned int mode) 574 { 575 int error; 576 577 error = permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, NULL); 578 if (error) 579 return error; 580 581 /* Don't create entries in an unlinked directory */ 582 if (!dip->i_inode.i_nlink) 583 return -EPERM; 584 585 error = gfs2_dir_check(&dip->i_inode, name, NULL); 586 switch (error) { 587 case -ENOENT: 588 error = 0; 589 break; 590 case 0: 591 return -EEXIST; 592 default: 593 return error; 594 } 595 596 if (dip->i_di.di_entries == (u32)-1) 597 return -EFBIG; 598 if (S_ISDIR(mode) && dip->i_inode.i_nlink == (u32)-1) 599 return -EMLINK; 600 601 return 0; 602 } 603 604 static void munge_mode_uid_gid(struct gfs2_inode *dip, unsigned int *mode, 605 unsigned int *uid, unsigned int *gid) 606 { 607 if (GFS2_SB(&dip->i_inode)->sd_args.ar_suiddir && 608 (dip->i_inode.i_mode & S_ISUID) && dip->i_inode.i_uid) { 609 if (S_ISDIR(*mode)) 610 *mode |= S_ISUID; 611 else if (dip->i_inode.i_uid != current->fsuid) 612 *mode &= ~07111; 613 *uid = dip->i_inode.i_uid; 614 } else 615 *uid = current->fsuid; 616 617 if (dip->i_inode.i_mode & S_ISGID) { 618 if (S_ISDIR(*mode)) 619 *mode |= S_ISGID; 620 *gid = dip->i_inode.i_gid; 621 } else 622 *gid = current->fsgid; 623 } 624 625 static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation) 626 { 627 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 628 int error; 629 630 gfs2_alloc_get(dip); 631 632 dip->i_alloc.al_requested = RES_DINODE; 633 error = gfs2_inplace_reserve(dip); 634 if (error) 635 goto out; 636 637 error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS, 0); 638 if (error) 639 goto out_ipreserv; 640 641 *no_addr = gfs2_alloc_di(dip, generation); 642 643 gfs2_trans_end(sdp); 644 645 out_ipreserv: 646 gfs2_inplace_release(dip); 647 out: 648 gfs2_alloc_put(dip); 649 return error; 650 } 651 652 /** 653 * init_dinode - Fill in a new dinode structure 654 * @dip: the directory this inode is being created in 655 * @gl: The glock covering the new inode 656 * @inum: the inode number 657 * @mode: the file permissions 658 * @uid: 659 * @gid: 660 * 661 */ 662 663 static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, 664 const struct gfs2_inum_host *inum, unsigned int mode, 665 unsigned int uid, unsigned int gid, 666 const u64 *generation, dev_t dev) 667 { 668 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 669 struct gfs2_dinode *di; 670 struct buffer_head *dibh; 671 struct timespec tv = CURRENT_TIME; 672 673 dibh = gfs2_meta_new(gl, inum->no_addr); 674 gfs2_trans_add_bh(gl, dibh, 1); 675 gfs2_metatype_set(dibh, GFS2_METATYPE_DI, GFS2_FORMAT_DI); 676 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); 677 di = (struct gfs2_dinode *)dibh->b_data; 678 679 di->di_num.no_formal_ino = cpu_to_be64(inum->no_formal_ino); 680 di->di_num.no_addr = cpu_to_be64(inum->no_addr); 681 di->di_mode = cpu_to_be32(mode); 682 di->di_uid = cpu_to_be32(uid); 683 di->di_gid = cpu_to_be32(gid); 684 di->di_nlink = 0; 685 di->di_size = 0; 686 di->di_blocks = cpu_to_be64(1); 687 di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(tv.tv_sec); 688 di->di_major = cpu_to_be32(MAJOR(dev)); 689 di->di_minor = cpu_to_be32(MINOR(dev)); 690 di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr); 691 di->di_generation = cpu_to_be64(*generation); 692 di->di_flags = 0; 693 694 if (S_ISREG(mode)) { 695 if ((dip->i_di.di_flags & GFS2_DIF_INHERIT_JDATA) || 696 gfs2_tune_get(sdp, gt_new_files_jdata)) 697 di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA); 698 if ((dip->i_di.di_flags & GFS2_DIF_INHERIT_DIRECTIO) || 699 gfs2_tune_get(sdp, gt_new_files_directio)) 700 di->di_flags |= cpu_to_be32(GFS2_DIF_DIRECTIO); 701 } else if (S_ISDIR(mode)) { 702 di->di_flags |= cpu_to_be32(dip->i_di.di_flags & 703 GFS2_DIF_INHERIT_DIRECTIO); 704 di->di_flags |= cpu_to_be32(dip->i_di.di_flags & 705 GFS2_DIF_INHERIT_JDATA); 706 } 707 708 di->__pad1 = 0; 709 di->di_payload_format = cpu_to_be32(S_ISDIR(mode) ? GFS2_FORMAT_DE : 0); 710 di->di_height = 0; 711 di->__pad2 = 0; 712 di->__pad3 = 0; 713 di->di_depth = 0; 714 di->di_entries = 0; 715 memset(&di->__pad4, 0, sizeof(di->__pad4)); 716 di->di_eattr = 0; 717 di->di_atime_nsec = cpu_to_be32(tv.tv_nsec); 718 di->di_mtime_nsec = cpu_to_be32(tv.tv_nsec); 719 di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec); 720 memset(&di->di_reserved, 0, sizeof(di->di_reserved)); 721 722 brelse(dibh); 723 } 724 725 static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, 726 unsigned int mode, const struct gfs2_inum_host *inum, 727 const u64 *generation, dev_t dev) 728 { 729 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 730 unsigned int uid, gid; 731 int error; 732 733 munge_mode_uid_gid(dip, &mode, &uid, &gid); 734 gfs2_alloc_get(dip); 735 736 error = gfs2_quota_lock(dip, uid, gid); 737 if (error) 738 goto out; 739 740 error = gfs2_quota_check(dip, uid, gid); 741 if (error) 742 goto out_quota; 743 744 error = gfs2_trans_begin(sdp, RES_DINODE + RES_QUOTA, 0); 745 if (error) 746 goto out_quota; 747 748 init_dinode(dip, gl, inum, mode, uid, gid, generation, dev); 749 gfs2_quota_change(dip, +1, uid, gid); 750 gfs2_trans_end(sdp); 751 752 out_quota: 753 gfs2_quota_unlock(dip); 754 out: 755 gfs2_alloc_put(dip); 756 return error; 757 } 758 759 static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, 760 struct gfs2_inode *ip) 761 { 762 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 763 struct gfs2_alloc *al; 764 int alloc_required; 765 struct buffer_head *dibh; 766 int error; 767 768 al = gfs2_alloc_get(dip); 769 770 error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 771 if (error) 772 goto fail; 773 774 error = alloc_required = gfs2_diradd_alloc_required(&dip->i_inode, name); 775 if (alloc_required < 0) 776 goto fail; 777 if (alloc_required) { 778 error = gfs2_quota_check(dip, dip->i_inode.i_uid, dip->i_inode.i_gid); 779 if (error) 780 goto fail_quota_locks; 781 782 al->al_requested = sdp->sd_max_dirres; 783 784 error = gfs2_inplace_reserve(dip); 785 if (error) 786 goto fail_quota_locks; 787 788 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + 789 al->al_rgd->rd_length + 790 2 * RES_DINODE + 791 RES_STATFS + RES_QUOTA, 0); 792 if (error) 793 goto fail_ipreserv; 794 } else { 795 error = gfs2_trans_begin(sdp, RES_LEAF + 2 * RES_DINODE, 0); 796 if (error) 797 goto fail_quota_locks; 798 } 799 800 error = gfs2_dir_add(&dip->i_inode, name, ip, IF2DT(ip->i_inode.i_mode)); 801 if (error) 802 goto fail_end_trans; 803 804 error = gfs2_meta_inode_buffer(ip, &dibh); 805 if (error) 806 goto fail_end_trans; 807 ip->i_inode.i_nlink = 1; 808 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 809 gfs2_dinode_out(ip, dibh->b_data); 810 brelse(dibh); 811 return 0; 812 813 fail_end_trans: 814 gfs2_trans_end(sdp); 815 816 fail_ipreserv: 817 if (dip->i_alloc.al_rgd) 818 gfs2_inplace_release(dip); 819 820 fail_quota_locks: 821 gfs2_quota_unlock(dip); 822 823 fail: 824 gfs2_alloc_put(dip); 825 return error; 826 } 827 828 static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip) 829 { 830 int err; 831 size_t len; 832 void *value; 833 char *name; 834 struct gfs2_ea_request er; 835 836 err = security_inode_init_security(&ip->i_inode, &dip->i_inode, 837 &name, &value, &len); 838 839 if (err) { 840 if (err == -EOPNOTSUPP) 841 return 0; 842 return err; 843 } 844 845 memset(&er, 0, sizeof(struct gfs2_ea_request)); 846 847 er.er_type = GFS2_EATYPE_SECURITY; 848 er.er_name = name; 849 er.er_data = value; 850 er.er_name_len = strlen(name); 851 er.er_data_len = len; 852 853 err = gfs2_ea_set_i(ip, &er); 854 855 kfree(value); 856 kfree(name); 857 858 return err; 859 } 860 861 /** 862 * gfs2_createi - Create a new inode 863 * @ghs: An array of two holders 864 * @name: The name of the new file 865 * @mode: the permissions on the new inode 866 * 867 * @ghs[0] is an initialized holder for the directory 868 * @ghs[1] is the holder for the inode lock 869 * 870 * If the return value is not NULL, the glocks on both the directory and the new 871 * file are held. A transaction has been started and an inplace reservation 872 * is held, as well. 873 * 874 * Returns: An inode 875 */ 876 877 struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, 878 unsigned int mode, dev_t dev) 879 { 880 struct inode *inode = NULL; 881 struct gfs2_inode *dip = ghs->gh_gl->gl_object; 882 struct inode *dir = &dip->i_inode; 883 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 884 struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 }; 885 int error; 886 u64 generation; 887 888 if (!name->len || name->len > GFS2_FNAMESIZE) 889 return ERR_PTR(-ENAMETOOLONG); 890 891 gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, ghs); 892 error = gfs2_glock_nq(ghs); 893 if (error) 894 goto fail; 895 896 error = create_ok(dip, name, mode); 897 if (error) 898 goto fail_gunlock; 899 900 error = pick_formal_ino(sdp, &inum.no_formal_ino); 901 if (error) 902 goto fail_gunlock; 903 904 error = alloc_dinode(dip, &inum.no_addr, &generation); 905 if (error) 906 goto fail_gunlock; 907 908 error = gfs2_glock_nq_num(sdp, inum.no_addr, &gfs2_inode_glops, 909 LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1); 910 if (error) 911 goto fail_gunlock; 912 913 error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev); 914 if (error) 915 goto fail_gunlock2; 916 917 inode = gfs2_inode_lookup(dir->i_sb, inum.no_addr, IF2DT(mode)); 918 if (IS_ERR(inode)) 919 goto fail_gunlock2; 920 921 error = gfs2_inode_refresh(GFS2_I(inode)); 922 if (error) 923 goto fail_gunlock2; 924 925 error = gfs2_acl_create(dip, GFS2_I(inode)); 926 if (error) 927 goto fail_gunlock2; 928 929 error = gfs2_security_init(dip, GFS2_I(inode)); 930 if (error) 931 goto fail_gunlock2; 932 933 error = link_dinode(dip, name, GFS2_I(inode)); 934 if (error) 935 goto fail_gunlock2; 936 937 if (!inode) 938 return ERR_PTR(-ENOMEM); 939 return inode; 940 941 fail_gunlock2: 942 gfs2_glock_dq_uninit(ghs + 1); 943 if (inode) 944 iput(inode); 945 fail_gunlock: 946 gfs2_glock_dq(ghs); 947 fail: 948 return ERR_PTR(error); 949 } 950 951 /** 952 * gfs2_rmdiri - Remove a directory 953 * @dip: The parent directory of the directory to be removed 954 * @name: The name of the directory to be removed 955 * @ip: The GFS2 inode of the directory to be removed 956 * 957 * Assumes Glocks on dip and ip are held 958 * 959 * Returns: errno 960 */ 961 962 int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, 963 struct gfs2_inode *ip) 964 { 965 struct qstr dotname; 966 int error; 967 968 if (ip->i_di.di_entries != 2) { 969 if (gfs2_consist_inode(ip)) 970 gfs2_dinode_print(ip); 971 return -EIO; 972 } 973 974 error = gfs2_dir_del(dip, name); 975 if (error) 976 return error; 977 978 error = gfs2_change_nlink(dip, -1); 979 if (error) 980 return error; 981 982 gfs2_str2qstr(&dotname, "."); 983 error = gfs2_dir_del(ip, &dotname); 984 if (error) 985 return error; 986 987 gfs2_str2qstr(&dotname, ".."); 988 error = gfs2_dir_del(ip, &dotname); 989 if (error) 990 return error; 991 992 /* It looks odd, but it really should be done twice */ 993 error = gfs2_change_nlink(ip, -1); 994 if (error) 995 return error; 996 997 error = gfs2_change_nlink(ip, -1); 998 if (error) 999 return error; 1000 1001 return error; 1002 } 1003 1004 /* 1005 * gfs2_unlink_ok - check to see that a inode is still in a directory 1006 * @dip: the directory 1007 * @name: the name of the file 1008 * @ip: the inode 1009 * 1010 * Assumes that the lock on (at least) @dip is held. 1011 * 1012 * Returns: 0 if the parent/child relationship is correct, errno if it isn't 1013 */ 1014 1015 int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, 1016 const struct gfs2_inode *ip) 1017 { 1018 int error; 1019 1020 if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode)) 1021 return -EPERM; 1022 1023 if ((dip->i_inode.i_mode & S_ISVTX) && 1024 dip->i_inode.i_uid != current->fsuid && 1025 ip->i_inode.i_uid != current->fsuid && !capable(CAP_FOWNER)) 1026 return -EPERM; 1027 1028 if (IS_APPEND(&dip->i_inode)) 1029 return -EPERM; 1030 1031 error = permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, NULL); 1032 if (error) 1033 return error; 1034 1035 error = gfs2_dir_check(&dip->i_inode, name, ip); 1036 if (error) 1037 return error; 1038 1039 return 0; 1040 } 1041 1042 /* 1043 * gfs2_ok_to_move - check if it's ok to move a directory to another directory 1044 * @this: move this 1045 * @to: to here 1046 * 1047 * Follow @to back to the root and make sure we don't encounter @this 1048 * Assumes we already hold the rename lock. 1049 * 1050 * Returns: errno 1051 */ 1052 1053 int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to) 1054 { 1055 struct inode *dir = &to->i_inode; 1056 struct super_block *sb = dir->i_sb; 1057 struct inode *tmp; 1058 struct qstr dotdot; 1059 int error = 0; 1060 1061 gfs2_str2qstr(&dotdot, ".."); 1062 1063 igrab(dir); 1064 1065 for (;;) { 1066 if (dir == &this->i_inode) { 1067 error = -EINVAL; 1068 break; 1069 } 1070 if (dir == sb->s_root->d_inode) { 1071 error = 0; 1072 break; 1073 } 1074 1075 tmp = gfs2_lookupi(dir, &dotdot, 1, NULL); 1076 if (IS_ERR(tmp)) { 1077 error = PTR_ERR(tmp); 1078 break; 1079 } 1080 1081 iput(dir); 1082 dir = tmp; 1083 } 1084 1085 iput(dir); 1086 1087 return error; 1088 } 1089 1090 /** 1091 * gfs2_readlinki - return the contents of a symlink 1092 * @ip: the symlink's inode 1093 * @buf: a pointer to the buffer to be filled 1094 * @len: a pointer to the length of @buf 1095 * 1096 * If @buf is too small, a piece of memory is kmalloc()ed and needs 1097 * to be freed by the caller. 1098 * 1099 * Returns: errno 1100 */ 1101 1102 int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len) 1103 { 1104 struct gfs2_holder i_gh; 1105 struct buffer_head *dibh; 1106 unsigned int x; 1107 int error; 1108 1109 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh); 1110 error = gfs2_glock_nq_atime(&i_gh); 1111 if (error) { 1112 gfs2_holder_uninit(&i_gh); 1113 return error; 1114 } 1115 1116 if (!ip->i_di.di_size) { 1117 gfs2_consist_inode(ip); 1118 error = -EIO; 1119 goto out; 1120 } 1121 1122 error = gfs2_meta_inode_buffer(ip, &dibh); 1123 if (error) 1124 goto out; 1125 1126 x = ip->i_di.di_size + 1; 1127 if (x > *len) { 1128 *buf = kmalloc(x, GFP_KERNEL); 1129 if (!*buf) { 1130 error = -ENOMEM; 1131 goto out_brelse; 1132 } 1133 } 1134 1135 memcpy(*buf, dibh->b_data + sizeof(struct gfs2_dinode), x); 1136 *len = x; 1137 1138 out_brelse: 1139 brelse(dibh); 1140 out: 1141 gfs2_glock_dq_uninit(&i_gh); 1142 return error; 1143 } 1144 1145 /** 1146 * gfs2_glock_nq_atime - Acquire a hold on an inode's glock, and 1147 * conditionally update the inode's atime 1148 * @gh: the holder to acquire 1149 * 1150 * Tests atime (access time) for gfs2_read, gfs2_readdir and gfs2_mmap 1151 * Update if the difference between the current time and the inode's current 1152 * atime is greater than an interval specified at mount. 1153 * 1154 * Returns: errno 1155 */ 1156 1157 int gfs2_glock_nq_atime(struct gfs2_holder *gh) 1158 { 1159 struct gfs2_glock *gl = gh->gh_gl; 1160 struct gfs2_sbd *sdp = gl->gl_sbd; 1161 struct gfs2_inode *ip = gl->gl_object; 1162 s64 quantum = gfs2_tune_get(sdp, gt_atime_quantum); 1163 unsigned int state; 1164 int flags; 1165 int error; 1166 struct timespec tv = CURRENT_TIME; 1167 1168 if (gfs2_assert_warn(sdp, gh->gh_flags & GL_ATIME) || 1169 gfs2_assert_warn(sdp, !(gh->gh_flags & GL_ASYNC)) || 1170 gfs2_assert_warn(sdp, gl->gl_ops == &gfs2_inode_glops)) 1171 return -EINVAL; 1172 1173 state = gh->gh_state; 1174 flags = gh->gh_flags; 1175 1176 error = gfs2_glock_nq(gh); 1177 if (error) 1178 return error; 1179 1180 if (test_bit(SDF_NOATIME, &sdp->sd_flags) || 1181 (sdp->sd_vfs->s_flags & MS_RDONLY)) 1182 return 0; 1183 1184 if (tv.tv_sec - ip->i_inode.i_atime.tv_sec >= quantum) { 1185 gfs2_glock_dq(gh); 1186 gfs2_holder_reinit(LM_ST_EXCLUSIVE, gh->gh_flags & ~LM_FLAG_ANY, 1187 gh); 1188 error = gfs2_glock_nq(gh); 1189 if (error) 1190 return error; 1191 1192 /* Verify that atime hasn't been updated while we were 1193 trying to get exclusive lock. */ 1194 1195 tv = CURRENT_TIME; 1196 if (tv.tv_sec - ip->i_inode.i_atime.tv_sec >= quantum) { 1197 struct buffer_head *dibh; 1198 struct gfs2_dinode *di; 1199 1200 error = gfs2_trans_begin(sdp, RES_DINODE, 0); 1201 if (error == -EROFS) 1202 return 0; 1203 if (error) 1204 goto fail; 1205 1206 error = gfs2_meta_inode_buffer(ip, &dibh); 1207 if (error) 1208 goto fail_end_trans; 1209 1210 ip->i_inode.i_atime = tv; 1211 1212 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1213 di = (struct gfs2_dinode *)dibh->b_data; 1214 di->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec); 1215 di->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec); 1216 brelse(dibh); 1217 1218 gfs2_trans_end(sdp); 1219 } 1220 1221 /* If someone else has asked for the glock, 1222 unlock and let them have it. Then reacquire 1223 in the original state. */ 1224 if (gfs2_glock_is_blocking(gl)) { 1225 gfs2_glock_dq(gh); 1226 gfs2_holder_reinit(state, flags, gh); 1227 return gfs2_glock_nq(gh); 1228 } 1229 } 1230 1231 return 0; 1232 1233 fail_end_trans: 1234 gfs2_trans_end(sdp); 1235 fail: 1236 gfs2_glock_dq(gh); 1237 return error; 1238 } 1239 1240 static int 1241 __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) 1242 { 1243 struct buffer_head *dibh; 1244 int error; 1245 1246 error = gfs2_meta_inode_buffer(ip, &dibh); 1247 if (!error) { 1248 error = inode_setattr(&ip->i_inode, attr); 1249 gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error); 1250 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1251 gfs2_dinode_out(ip, dibh->b_data); 1252 brelse(dibh); 1253 } 1254 return error; 1255 } 1256 1257 /** 1258 * gfs2_setattr_simple - 1259 * @ip: 1260 * @attr: 1261 * 1262 * Called with a reference on the vnode. 1263 * 1264 * Returns: errno 1265 */ 1266 1267 int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) 1268 { 1269 int error; 1270 1271 if (current->journal_info) 1272 return __gfs2_setattr_simple(ip, attr); 1273 1274 error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), RES_DINODE, 0); 1275 if (error) 1276 return error; 1277 1278 error = __gfs2_setattr_simple(ip, attr); 1279 gfs2_trans_end(GFS2_SB(&ip->i_inode)); 1280 return error; 1281 } 1282 1283 void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) 1284 { 1285 const struct gfs2_dinode_host *di = &ip->i_di; 1286 struct gfs2_dinode *str = buf; 1287 1288 str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC); 1289 str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI); 1290 str->di_header.__pad0 = 0; 1291 str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI); 1292 str->di_header.__pad1 = 0; 1293 str->di_num.no_addr = cpu_to_be64(ip->i_no_addr); 1294 str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino); 1295 str->di_mode = cpu_to_be32(ip->i_inode.i_mode); 1296 str->di_uid = cpu_to_be32(ip->i_inode.i_uid); 1297 str->di_gid = cpu_to_be32(ip->i_inode.i_gid); 1298 str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink); 1299 str->di_size = cpu_to_be64(di->di_size); 1300 str->di_blocks = cpu_to_be64(di->di_blocks); 1301 str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec); 1302 str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec); 1303 str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec); 1304 1305 str->di_goal_meta = cpu_to_be64(di->di_goal_meta); 1306 str->di_goal_data = cpu_to_be64(di->di_goal_data); 1307 str->di_generation = cpu_to_be64(di->di_generation); 1308 1309 str->di_flags = cpu_to_be32(di->di_flags); 1310 str->di_height = cpu_to_be16(di->di_height); 1311 str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) && 1312 !(ip->i_di.di_flags & GFS2_DIF_EXHASH) ? 1313 GFS2_FORMAT_DE : 0); 1314 str->di_depth = cpu_to_be16(di->di_depth); 1315 str->di_entries = cpu_to_be32(di->di_entries); 1316 1317 str->di_eattr = cpu_to_be64(di->di_eattr); 1318 str->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec); 1319 str->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec); 1320 str->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec); 1321 } 1322 1323 void gfs2_dinode_print(const struct gfs2_inode *ip) 1324 { 1325 const struct gfs2_dinode_host *di = &ip->i_di; 1326 1327 printk(KERN_INFO " no_formal_ino = %llu\n", 1328 (unsigned long long)ip->i_no_formal_ino); 1329 printk(KERN_INFO " no_addr = %llu\n", 1330 (unsigned long long)ip->i_no_addr); 1331 printk(KERN_INFO " di_size = %llu\n", (unsigned long long)di->di_size); 1332 printk(KERN_INFO " di_blocks = %llu\n", 1333 (unsigned long long)di->di_blocks); 1334 printk(KERN_INFO " di_goal_meta = %llu\n", 1335 (unsigned long long)di->di_goal_meta); 1336 printk(KERN_INFO " di_goal_data = %llu\n", 1337 (unsigned long long)di->di_goal_data); 1338 printk(KERN_INFO " di_flags = 0x%.8X\n", di->di_flags); 1339 printk(KERN_INFO " di_height = %u\n", di->di_height); 1340 printk(KERN_INFO " di_depth = %u\n", di->di_depth); 1341 printk(KERN_INFO " di_entries = %u\n", di->di_entries); 1342 printk(KERN_INFO " di_eattr = %llu\n", 1343 (unsigned long long)di->di_eattr); 1344 } 1345 1346