1 /* 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 3 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. 4 * 5 * This copyrighted material is made available to anyone wishing to use, 6 * modify, copy, or redistribute it subject to the terms and conditions 7 * of the GNU General Public License version 2. 8 */ 9 10 #include <linux/sched.h> 11 #include <linux/slab.h> 12 #include <linux/spinlock.h> 13 #include <linux/completion.h> 14 #include <linux/buffer_head.h> 15 #include <linux/posix_acl.h> 16 #include <linux/sort.h> 17 #include <linux/gfs2_ondisk.h> 18 #include <linux/crc32.h> 19 #include <linux/lm_interface.h> 20 #include <linux/security.h> 21 #include <linux/time.h> 22 23 #include "gfs2.h" 24 #include "incore.h" 25 #include "acl.h" 26 #include "bmap.h" 27 #include "dir.h" 28 #include "eattr.h" 29 #include "glock.h" 30 #include "glops.h" 31 #include "inode.h" 32 #include "log.h" 33 #include "meta_io.h" 34 #include "ops_address.h" 35 #include "quota.h" 36 #include "rgrp.h" 37 #include "trans.h" 38 #include "util.h" 39 40 struct gfs2_inum_range_host { 41 u64 ir_start; 42 u64 ir_length; 43 }; 44 45 static int iget_test(struct inode *inode, void *opaque) 46 { 47 struct gfs2_inode *ip = GFS2_I(inode); 48 u64 *no_addr = opaque; 49 50 if (ip->i_no_addr == *no_addr && test_bit(GIF_USER, &ip->i_flags)) 51 return 1; 52 53 return 0; 54 } 55 56 static int iget_set(struct inode *inode, void *opaque) 57 { 58 struct gfs2_inode *ip = GFS2_I(inode); 59 u64 *no_addr = opaque; 60 61 inode->i_ino = (unsigned long)*no_addr; 62 ip->i_no_addr = *no_addr; 63 set_bit(GIF_USER, &ip->i_flags); 64 return 0; 65 } 66 67 struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr) 68 { 69 unsigned long hash = (unsigned long)no_addr; 70 return ilookup5(sb, hash, iget_test, &no_addr); 71 } 72 73 static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr) 74 { 75 unsigned long hash = (unsigned long)no_addr; 76 return iget5_locked(sb, hash, iget_test, iget_set, &no_addr); 77 } 78 79 struct gfs2_skip_data { 80 u64 no_addr; 81 int skipped; 82 }; 83 84 static int iget_skip_test(struct inode *inode, void *opaque) 85 { 86 struct gfs2_inode *ip = GFS2_I(inode); 87 struct gfs2_skip_data *data = opaque; 88 89 if (ip->i_no_addr == data->no_addr && test_bit(GIF_USER, &ip->i_flags)){ 90 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)){ 91 data->skipped = 1; 92 return 0; 93 } 94 return 1; 95 } 96 return 0; 97 } 98 99 static int iget_skip_set(struct inode *inode, void *opaque) 100 { 101 struct gfs2_inode *ip = GFS2_I(inode); 102 struct gfs2_skip_data *data = opaque; 103 104 if (data->skipped) 105 return 1; 106 inode->i_ino = (unsigned long)(data->no_addr); 107 ip->i_no_addr = data->no_addr; 108 set_bit(GIF_USER, &ip->i_flags); 109 return 0; 110 } 111 112 static struct inode *gfs2_iget_skip(struct super_block *sb, 113 u64 no_addr) 114 { 115 struct gfs2_skip_data data; 116 unsigned long hash = (unsigned long)no_addr; 117 118 data.no_addr = no_addr; 119 data.skipped = 0; 120 return iget5_locked(sb, hash, iget_skip_test, iget_skip_set, &data); 121 } 122 123 /** 124 * GFS2 lookup code fills in vfs inode contents based on info obtained 125 * from directory entry inside gfs2_inode_lookup(). This has caused issues 126 * with NFS code path since its get_dentry routine doesn't have the relevant 127 * directory entry when gfs2_inode_lookup() is invoked. Part of the code 128 * segment inside gfs2_inode_lookup code needs to get moved around. 129 * 130 * Clean up I_LOCK and I_NEW as well. 131 **/ 132 133 void gfs2_set_iop(struct inode *inode) 134 { 135 struct gfs2_sbd *sdp = GFS2_SB(inode); 136 umode_t mode = inode->i_mode; 137 138 if (S_ISREG(mode)) { 139 inode->i_op = &gfs2_file_iops; 140 if (sdp->sd_args.ar_localflocks) 141 inode->i_fop = &gfs2_file_fops_nolock; 142 else 143 inode->i_fop = &gfs2_file_fops; 144 } else if (S_ISDIR(mode)) { 145 inode->i_op = &gfs2_dir_iops; 146 if (sdp->sd_args.ar_localflocks) 147 inode->i_fop = &gfs2_dir_fops_nolock; 148 else 149 inode->i_fop = &gfs2_dir_fops; 150 } else if (S_ISLNK(mode)) { 151 inode->i_op = &gfs2_symlink_iops; 152 } else { 153 inode->i_op = &gfs2_file_iops; 154 init_special_inode(inode, inode->i_mode, inode->i_rdev); 155 } 156 157 unlock_new_inode(inode); 158 } 159 160 /** 161 * gfs2_inode_lookup - Lookup an inode 162 * @sb: The super block 163 * @no_addr: The inode number 164 * @type: The type of the inode 165 * @skip_freeing: set this not return an inode if it is currently being freed. 166 * 167 * Returns: A VFS inode, or an error 168 */ 169 170 struct inode *gfs2_inode_lookup(struct super_block *sb, 171 unsigned int type, 172 u64 no_addr, 173 u64 no_formal_ino, int skip_freeing) 174 { 175 struct inode *inode; 176 struct gfs2_inode *ip; 177 struct gfs2_glock *io_gl; 178 int error; 179 180 if (skip_freeing) 181 inode = gfs2_iget_skip(sb, no_addr); 182 else 183 inode = gfs2_iget(sb, no_addr); 184 ip = GFS2_I(inode); 185 186 if (!inode) 187 return ERR_PTR(-ENOBUFS); 188 189 if (inode->i_state & I_NEW) { 190 struct gfs2_sbd *sdp = GFS2_SB(inode); 191 ip->i_no_formal_ino = no_formal_ino; 192 193 error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl); 194 if (unlikely(error)) 195 goto fail; 196 ip->i_gl->gl_object = ip; 197 198 error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl); 199 if (unlikely(error)) 200 goto fail_put; 201 202 set_bit(GIF_INVALID, &ip->i_flags); 203 error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); 204 if (unlikely(error)) 205 goto fail_iopen; 206 ip->i_iopen_gh.gh_gl->gl_object = ip; 207 208 gfs2_glock_put(io_gl); 209 210 if ((type == DT_UNKNOWN) && (no_formal_ino == 0)) 211 goto gfs2_nfsbypass; 212 213 inode->i_mode = DT2IF(type); 214 215 /* 216 * We must read the inode in order to work out its type in 217 * this case. Note that this doesn't happen often as we normally 218 * know the type beforehand. This code path only occurs during 219 * unlinked inode recovery (where it is safe to do this glock, 220 * which is not true in the general case). 221 */ 222 if (type == DT_UNKNOWN) { 223 struct gfs2_holder gh; 224 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); 225 if (unlikely(error)) 226 goto fail_glock; 227 /* Inode is now uptodate */ 228 gfs2_glock_dq_uninit(&gh); 229 } 230 231 gfs2_set_iop(inode); 232 } 233 234 gfs2_nfsbypass: 235 return inode; 236 fail_glock: 237 gfs2_glock_dq(&ip->i_iopen_gh); 238 fail_iopen: 239 gfs2_glock_put(io_gl); 240 fail_put: 241 ip->i_gl->gl_object = NULL; 242 gfs2_glock_put(ip->i_gl); 243 fail: 244 iget_failed(inode); 245 return ERR_PTR(error); 246 } 247 248 static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) 249 { 250 const struct gfs2_dinode *str = buf; 251 struct timespec atime; 252 u16 height, depth; 253 254 if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr))) 255 goto corrupt; 256 ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino); 257 ip->i_inode.i_mode = be32_to_cpu(str->di_mode); 258 ip->i_inode.i_rdev = 0; 259 switch (ip->i_inode.i_mode & S_IFMT) { 260 case S_IFBLK: 261 case S_IFCHR: 262 ip->i_inode.i_rdev = MKDEV(be32_to_cpu(str->di_major), 263 be32_to_cpu(str->di_minor)); 264 break; 265 }; 266 267 ip->i_inode.i_uid = be32_to_cpu(str->di_uid); 268 ip->i_inode.i_gid = be32_to_cpu(str->di_gid); 269 /* 270 * We will need to review setting the nlink count here in the 271 * light of the forthcoming ro bind mount work. This is a reminder 272 * to do that. 273 */ 274 ip->i_inode.i_nlink = be32_to_cpu(str->di_nlink); 275 ip->i_disksize = be64_to_cpu(str->di_size); 276 i_size_write(&ip->i_inode, ip->i_disksize); 277 gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks)); 278 atime.tv_sec = be64_to_cpu(str->di_atime); 279 atime.tv_nsec = be32_to_cpu(str->di_atime_nsec); 280 if (timespec_compare(&ip->i_inode.i_atime, &atime) < 0) 281 ip->i_inode.i_atime = atime; 282 ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime); 283 ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec); 284 ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime); 285 ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec); 286 287 ip->i_goal = be64_to_cpu(str->di_goal_meta); 288 ip->i_generation = be64_to_cpu(str->di_generation); 289 290 ip->i_diskflags = be32_to_cpu(str->di_flags); 291 gfs2_set_inode_flags(&ip->i_inode); 292 height = be16_to_cpu(str->di_height); 293 if (unlikely(height > GFS2_MAX_META_HEIGHT)) 294 goto corrupt; 295 ip->i_height = (u8)height; 296 297 depth = be16_to_cpu(str->di_depth); 298 if (unlikely(depth > GFS2_DIR_MAX_DEPTH)) 299 goto corrupt; 300 ip->i_depth = (u8)depth; 301 ip->i_entries = be32_to_cpu(str->di_entries); 302 303 ip->i_eattr = be64_to_cpu(str->di_eattr); 304 if (S_ISREG(ip->i_inode.i_mode)) 305 gfs2_set_aops(&ip->i_inode); 306 307 return 0; 308 corrupt: 309 if (gfs2_consist_inode(ip)) 310 gfs2_dinode_print(ip); 311 return -EIO; 312 } 313 314 /** 315 * gfs2_inode_refresh - Refresh the incore copy of the dinode 316 * @ip: The GFS2 inode 317 * 318 * Returns: errno 319 */ 320 321 int gfs2_inode_refresh(struct gfs2_inode *ip) 322 { 323 struct buffer_head *dibh; 324 int error; 325 326 error = gfs2_meta_inode_buffer(ip, &dibh); 327 if (error) 328 return error; 329 330 if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), dibh, GFS2_METATYPE_DI)) { 331 brelse(dibh); 332 return -EIO; 333 } 334 335 error = gfs2_dinode_in(ip, dibh->b_data); 336 brelse(dibh); 337 clear_bit(GIF_INVALID, &ip->i_flags); 338 339 return error; 340 } 341 342 int gfs2_dinode_dealloc(struct gfs2_inode *ip) 343 { 344 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 345 struct gfs2_alloc *al; 346 struct gfs2_rgrpd *rgd; 347 int error; 348 349 if (gfs2_get_inode_blocks(&ip->i_inode) != 1) { 350 if (gfs2_consist_inode(ip)) 351 gfs2_dinode_print(ip); 352 return -EIO; 353 } 354 355 al = gfs2_alloc_get(ip); 356 if (!al) 357 return -ENOMEM; 358 359 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 360 if (error) 361 goto out; 362 363 error = gfs2_rindex_hold(sdp, &al->al_ri_gh); 364 if (error) 365 goto out_qs; 366 367 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr); 368 if (!rgd) { 369 gfs2_consist_inode(ip); 370 error = -EIO; 371 goto out_rindex_relse; 372 } 373 374 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, 375 &al->al_rgd_gh); 376 if (error) 377 goto out_rindex_relse; 378 379 error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA, 1); 380 if (error) 381 goto out_rg_gunlock; 382 383 set_bit(GLF_DIRTY, &ip->i_gl->gl_flags); 384 set_bit(GLF_LFLUSH, &ip->i_gl->gl_flags); 385 386 gfs2_free_di(rgd, ip); 387 388 gfs2_trans_end(sdp); 389 390 out_rg_gunlock: 391 gfs2_glock_dq_uninit(&al->al_rgd_gh); 392 out_rindex_relse: 393 gfs2_glock_dq_uninit(&al->al_ri_gh); 394 out_qs: 395 gfs2_quota_unhold(ip); 396 out: 397 gfs2_alloc_put(ip); 398 return error; 399 } 400 401 /** 402 * gfs2_change_nlink - Change nlink count on inode 403 * @ip: The GFS2 inode 404 * @diff: The change in the nlink count required 405 * 406 * Returns: errno 407 */ 408 int gfs2_change_nlink(struct gfs2_inode *ip, int diff) 409 { 410 struct buffer_head *dibh; 411 u32 nlink; 412 int error; 413 414 BUG_ON(diff != 1 && diff != -1); 415 nlink = ip->i_inode.i_nlink + diff; 416 417 /* If we are reducing the nlink count, but the new value ends up being 418 bigger than the old one, we must have underflowed. */ 419 if (diff < 0 && nlink > ip->i_inode.i_nlink) { 420 if (gfs2_consist_inode(ip)) 421 gfs2_dinode_print(ip); 422 return -EIO; 423 } 424 425 error = gfs2_meta_inode_buffer(ip, &dibh); 426 if (error) 427 return error; 428 429 if (diff > 0) 430 inc_nlink(&ip->i_inode); 431 else 432 drop_nlink(&ip->i_inode); 433 434 ip->i_inode.i_ctime = CURRENT_TIME; 435 436 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 437 gfs2_dinode_out(ip, dibh->b_data); 438 brelse(dibh); 439 mark_inode_dirty(&ip->i_inode); 440 441 if (ip->i_inode.i_nlink == 0) 442 gfs2_unlink_di(&ip->i_inode); /* mark inode unlinked */ 443 444 return error; 445 } 446 447 struct inode *gfs2_lookup_simple(struct inode *dip, const char *name) 448 { 449 struct qstr qstr; 450 struct inode *inode; 451 gfs2_str2qstr(&qstr, name); 452 inode = gfs2_lookupi(dip, &qstr, 1); 453 /* gfs2_lookupi has inconsistent callers: vfs 454 * related routines expect NULL for no entry found, 455 * gfs2_lookup_simple callers expect ENOENT 456 * and do not check for NULL. 457 */ 458 if (inode == NULL) 459 return ERR_PTR(-ENOENT); 460 else 461 return inode; 462 } 463 464 465 /** 466 * gfs2_lookupi - Look up a filename in a directory and return its inode 467 * @d_gh: An initialized holder for the directory glock 468 * @name: The name of the inode to look for 469 * @is_root: If 1, ignore the caller's permissions 470 * @i_gh: An uninitialized holder for the new inode glock 471 * 472 * This can be called via the VFS filldir function when NFS is doing 473 * a readdirplus and the inode which its intending to stat isn't 474 * already in cache. In this case we must not take the directory glock 475 * again, since the readdir call will have already taken that lock. 476 * 477 * Returns: errno 478 */ 479 480 struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, 481 int is_root) 482 { 483 struct super_block *sb = dir->i_sb; 484 struct gfs2_inode *dip = GFS2_I(dir); 485 struct gfs2_holder d_gh; 486 int error = 0; 487 struct inode *inode = NULL; 488 int unlock = 0; 489 490 if (!name->len || name->len > GFS2_FNAMESIZE) 491 return ERR_PTR(-ENAMETOOLONG); 492 493 if ((name->len == 1 && memcmp(name->name, ".", 1) == 0) || 494 (name->len == 2 && memcmp(name->name, "..", 2) == 0 && 495 dir == sb->s_root->d_inode)) { 496 igrab(dir); 497 return dir; 498 } 499 500 if (gfs2_glock_is_locked_by_me(dip->i_gl) == NULL) { 501 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); 502 if (error) 503 return ERR_PTR(error); 504 unlock = 1; 505 } 506 507 if (!is_root) { 508 error = gfs2_permission(dir, MAY_EXEC); 509 if (error) 510 goto out; 511 } 512 513 inode = gfs2_dir_search(dir, name); 514 if (IS_ERR(inode)) 515 error = PTR_ERR(inode); 516 out: 517 if (unlock) 518 gfs2_glock_dq_uninit(&d_gh); 519 if (error == -ENOENT) 520 return NULL; 521 return inode ? inode : ERR_PTR(error); 522 } 523 524 static void gfs2_inum_range_in(struct gfs2_inum_range_host *ir, const void *buf) 525 { 526 const struct gfs2_inum_range *str = buf; 527 528 ir->ir_start = be64_to_cpu(str->ir_start); 529 ir->ir_length = be64_to_cpu(str->ir_length); 530 } 531 532 static void gfs2_inum_range_out(const struct gfs2_inum_range_host *ir, void *buf) 533 { 534 struct gfs2_inum_range *str = buf; 535 536 str->ir_start = cpu_to_be64(ir->ir_start); 537 str->ir_length = cpu_to_be64(ir->ir_length); 538 } 539 540 static int pick_formal_ino_1(struct gfs2_sbd *sdp, u64 *formal_ino) 541 { 542 struct gfs2_inode *ip = GFS2_I(sdp->sd_ir_inode); 543 struct buffer_head *bh; 544 struct gfs2_inum_range_host ir; 545 int error; 546 547 error = gfs2_trans_begin(sdp, RES_DINODE, 0); 548 if (error) 549 return error; 550 mutex_lock(&sdp->sd_inum_mutex); 551 552 error = gfs2_meta_inode_buffer(ip, &bh); 553 if (error) { 554 mutex_unlock(&sdp->sd_inum_mutex); 555 gfs2_trans_end(sdp); 556 return error; 557 } 558 559 gfs2_inum_range_in(&ir, bh->b_data + sizeof(struct gfs2_dinode)); 560 561 if (ir.ir_length) { 562 *formal_ino = ir.ir_start++; 563 ir.ir_length--; 564 gfs2_trans_add_bh(ip->i_gl, bh, 1); 565 gfs2_inum_range_out(&ir, 566 bh->b_data + sizeof(struct gfs2_dinode)); 567 brelse(bh); 568 mutex_unlock(&sdp->sd_inum_mutex); 569 gfs2_trans_end(sdp); 570 return 0; 571 } 572 573 brelse(bh); 574 575 mutex_unlock(&sdp->sd_inum_mutex); 576 gfs2_trans_end(sdp); 577 578 return 1; 579 } 580 581 static int pick_formal_ino_2(struct gfs2_sbd *sdp, u64 *formal_ino) 582 { 583 struct gfs2_inode *ip = GFS2_I(sdp->sd_ir_inode); 584 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_inum_inode); 585 struct gfs2_holder gh; 586 struct buffer_head *bh; 587 struct gfs2_inum_range_host ir; 588 int error; 589 590 error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); 591 if (error) 592 return error; 593 594 error = gfs2_trans_begin(sdp, 2 * RES_DINODE, 0); 595 if (error) 596 goto out; 597 mutex_lock(&sdp->sd_inum_mutex); 598 599 error = gfs2_meta_inode_buffer(ip, &bh); 600 if (error) 601 goto out_end_trans; 602 603 gfs2_inum_range_in(&ir, bh->b_data + sizeof(struct gfs2_dinode)); 604 605 if (!ir.ir_length) { 606 struct buffer_head *m_bh; 607 u64 x, y; 608 __be64 z; 609 610 error = gfs2_meta_inode_buffer(m_ip, &m_bh); 611 if (error) 612 goto out_brelse; 613 614 z = *(__be64 *)(m_bh->b_data + sizeof(struct gfs2_dinode)); 615 x = y = be64_to_cpu(z); 616 ir.ir_start = x; 617 ir.ir_length = GFS2_INUM_QUANTUM; 618 x += GFS2_INUM_QUANTUM; 619 if (x < y) 620 gfs2_consist_inode(m_ip); 621 z = cpu_to_be64(x); 622 gfs2_trans_add_bh(m_ip->i_gl, m_bh, 1); 623 *(__be64 *)(m_bh->b_data + sizeof(struct gfs2_dinode)) = z; 624 625 brelse(m_bh); 626 } 627 628 *formal_ino = ir.ir_start++; 629 ir.ir_length--; 630 631 gfs2_trans_add_bh(ip->i_gl, bh, 1); 632 gfs2_inum_range_out(&ir, bh->b_data + sizeof(struct gfs2_dinode)); 633 634 out_brelse: 635 brelse(bh); 636 out_end_trans: 637 mutex_unlock(&sdp->sd_inum_mutex); 638 gfs2_trans_end(sdp); 639 out: 640 gfs2_glock_dq_uninit(&gh); 641 return error; 642 } 643 644 static int pick_formal_ino(struct gfs2_sbd *sdp, u64 *inum) 645 { 646 int error; 647 648 error = pick_formal_ino_1(sdp, inum); 649 if (error <= 0) 650 return error; 651 652 error = pick_formal_ino_2(sdp, inum); 653 654 return error; 655 } 656 657 /** 658 * create_ok - OK to create a new on-disk inode here? 659 * @dip: Directory in which dinode is to be created 660 * @name: Name of new dinode 661 * @mode: 662 * 663 * Returns: errno 664 */ 665 666 static int create_ok(struct gfs2_inode *dip, const struct qstr *name, 667 unsigned int mode) 668 { 669 int error; 670 671 error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC); 672 if (error) 673 return error; 674 675 /* Don't create entries in an unlinked directory */ 676 if (!dip->i_inode.i_nlink) 677 return -EPERM; 678 679 error = gfs2_dir_check(&dip->i_inode, name, NULL); 680 switch (error) { 681 case -ENOENT: 682 error = 0; 683 break; 684 case 0: 685 return -EEXIST; 686 default: 687 return error; 688 } 689 690 if (dip->i_entries == (u32)-1) 691 return -EFBIG; 692 if (S_ISDIR(mode) && dip->i_inode.i_nlink == (u32)-1) 693 return -EMLINK; 694 695 return 0; 696 } 697 698 static void munge_mode_uid_gid(struct gfs2_inode *dip, unsigned int *mode, 699 unsigned int *uid, unsigned int *gid) 700 { 701 if (GFS2_SB(&dip->i_inode)->sd_args.ar_suiddir && 702 (dip->i_inode.i_mode & S_ISUID) && dip->i_inode.i_uid) { 703 if (S_ISDIR(*mode)) 704 *mode |= S_ISUID; 705 else if (dip->i_inode.i_uid != current_fsuid()) 706 *mode &= ~07111; 707 *uid = dip->i_inode.i_uid; 708 } else 709 *uid = current_fsuid(); 710 711 if (dip->i_inode.i_mode & S_ISGID) { 712 if (S_ISDIR(*mode)) 713 *mode |= S_ISGID; 714 *gid = dip->i_inode.i_gid; 715 } else 716 *gid = current_fsgid(); 717 } 718 719 static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation) 720 { 721 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 722 int error; 723 724 if (gfs2_alloc_get(dip) == NULL) 725 return -ENOMEM; 726 727 dip->i_alloc->al_requested = RES_DINODE; 728 error = gfs2_inplace_reserve(dip); 729 if (error) 730 goto out; 731 732 error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS, 0); 733 if (error) 734 goto out_ipreserv; 735 736 *no_addr = gfs2_alloc_di(dip, generation); 737 738 gfs2_trans_end(sdp); 739 740 out_ipreserv: 741 gfs2_inplace_release(dip); 742 out: 743 gfs2_alloc_put(dip); 744 return error; 745 } 746 747 /** 748 * init_dinode - Fill in a new dinode structure 749 * @dip: the directory this inode is being created in 750 * @gl: The glock covering the new inode 751 * @inum: the inode number 752 * @mode: the file permissions 753 * @uid: 754 * @gid: 755 * 756 */ 757 758 static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, 759 const struct gfs2_inum_host *inum, unsigned int mode, 760 unsigned int uid, unsigned int gid, 761 const u64 *generation, dev_t dev, struct buffer_head **bhp) 762 { 763 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 764 struct gfs2_dinode *di; 765 struct buffer_head *dibh; 766 struct timespec tv = CURRENT_TIME; 767 768 dibh = gfs2_meta_new(gl, inum->no_addr); 769 gfs2_trans_add_bh(gl, dibh, 1); 770 gfs2_metatype_set(dibh, GFS2_METATYPE_DI, GFS2_FORMAT_DI); 771 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); 772 di = (struct gfs2_dinode *)dibh->b_data; 773 774 di->di_num.no_formal_ino = cpu_to_be64(inum->no_formal_ino); 775 di->di_num.no_addr = cpu_to_be64(inum->no_addr); 776 di->di_mode = cpu_to_be32(mode); 777 di->di_uid = cpu_to_be32(uid); 778 di->di_gid = cpu_to_be32(gid); 779 di->di_nlink = 0; 780 di->di_size = 0; 781 di->di_blocks = cpu_to_be64(1); 782 di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(tv.tv_sec); 783 di->di_major = cpu_to_be32(MAJOR(dev)); 784 di->di_minor = cpu_to_be32(MINOR(dev)); 785 di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr); 786 di->di_generation = cpu_to_be64(*generation); 787 di->di_flags = 0; 788 789 if (S_ISREG(mode)) { 790 if ((dip->i_diskflags & GFS2_DIF_INHERIT_JDATA) || 791 gfs2_tune_get(sdp, gt_new_files_jdata)) 792 di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA); 793 } else if (S_ISDIR(mode)) { 794 di->di_flags |= cpu_to_be32(dip->i_diskflags & 795 GFS2_DIF_INHERIT_JDATA); 796 } 797 798 di->__pad1 = 0; 799 di->di_payload_format = cpu_to_be32(S_ISDIR(mode) ? GFS2_FORMAT_DE : 0); 800 di->di_height = 0; 801 di->__pad2 = 0; 802 di->__pad3 = 0; 803 di->di_depth = 0; 804 di->di_entries = 0; 805 memset(&di->__pad4, 0, sizeof(di->__pad4)); 806 di->di_eattr = 0; 807 di->di_atime_nsec = cpu_to_be32(tv.tv_nsec); 808 di->di_mtime_nsec = cpu_to_be32(tv.tv_nsec); 809 di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec); 810 memset(&di->di_reserved, 0, sizeof(di->di_reserved)); 811 812 set_buffer_uptodate(dibh); 813 814 *bhp = dibh; 815 } 816 817 static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, 818 unsigned int mode, const struct gfs2_inum_host *inum, 819 const u64 *generation, dev_t dev, struct buffer_head **bhp) 820 { 821 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 822 unsigned int uid, gid; 823 int error; 824 825 munge_mode_uid_gid(dip, &mode, &uid, &gid); 826 if (!gfs2_alloc_get(dip)) 827 return -ENOMEM; 828 829 error = gfs2_quota_lock(dip, uid, gid); 830 if (error) 831 goto out; 832 833 error = gfs2_quota_check(dip, uid, gid); 834 if (error) 835 goto out_quota; 836 837 error = gfs2_trans_begin(sdp, RES_DINODE + RES_QUOTA, 0); 838 if (error) 839 goto out_quota; 840 841 init_dinode(dip, gl, inum, mode, uid, gid, generation, dev, bhp); 842 gfs2_quota_change(dip, +1, uid, gid); 843 gfs2_trans_end(sdp); 844 845 out_quota: 846 gfs2_quota_unlock(dip); 847 out: 848 gfs2_alloc_put(dip); 849 return error; 850 } 851 852 static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, 853 struct gfs2_inode *ip) 854 { 855 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 856 struct gfs2_alloc *al; 857 int alloc_required; 858 struct buffer_head *dibh; 859 int error; 860 861 al = gfs2_alloc_get(dip); 862 if (!al) 863 return -ENOMEM; 864 865 error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 866 if (error) 867 goto fail; 868 869 error = alloc_required = gfs2_diradd_alloc_required(&dip->i_inode, name); 870 if (alloc_required < 0) 871 goto fail_quota_locks; 872 if (alloc_required) { 873 error = gfs2_quota_check(dip, dip->i_inode.i_uid, dip->i_inode.i_gid); 874 if (error) 875 goto fail_quota_locks; 876 877 al->al_requested = sdp->sd_max_dirres; 878 879 error = gfs2_inplace_reserve(dip); 880 if (error) 881 goto fail_quota_locks; 882 883 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + 884 al->al_rgd->rd_length + 885 2 * RES_DINODE + 886 RES_STATFS + RES_QUOTA, 0); 887 if (error) 888 goto fail_ipreserv; 889 } else { 890 error = gfs2_trans_begin(sdp, RES_LEAF + 2 * RES_DINODE, 0); 891 if (error) 892 goto fail_quota_locks; 893 } 894 895 error = gfs2_dir_add(&dip->i_inode, name, ip, IF2DT(ip->i_inode.i_mode)); 896 if (error) 897 goto fail_end_trans; 898 899 error = gfs2_meta_inode_buffer(ip, &dibh); 900 if (error) 901 goto fail_end_trans; 902 ip->i_inode.i_nlink = 1; 903 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 904 gfs2_dinode_out(ip, dibh->b_data); 905 brelse(dibh); 906 return 0; 907 908 fail_end_trans: 909 gfs2_trans_end(sdp); 910 911 fail_ipreserv: 912 if (dip->i_alloc->al_rgd) 913 gfs2_inplace_release(dip); 914 915 fail_quota_locks: 916 gfs2_quota_unlock(dip); 917 918 fail: 919 gfs2_alloc_put(dip); 920 return error; 921 } 922 923 static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip) 924 { 925 int err; 926 size_t len; 927 void *value; 928 char *name; 929 struct gfs2_ea_request er; 930 931 err = security_inode_init_security(&ip->i_inode, &dip->i_inode, 932 &name, &value, &len); 933 934 if (err) { 935 if (err == -EOPNOTSUPP) 936 return 0; 937 return err; 938 } 939 940 memset(&er, 0, sizeof(struct gfs2_ea_request)); 941 942 er.er_type = GFS2_EATYPE_SECURITY; 943 er.er_name = name; 944 er.er_data = value; 945 er.er_name_len = strlen(name); 946 er.er_data_len = len; 947 948 err = gfs2_ea_set_i(ip, &er); 949 950 kfree(value); 951 kfree(name); 952 953 return err; 954 } 955 956 /** 957 * gfs2_createi - Create a new inode 958 * @ghs: An array of two holders 959 * @name: The name of the new file 960 * @mode: the permissions on the new inode 961 * 962 * @ghs[0] is an initialized holder for the directory 963 * @ghs[1] is the holder for the inode lock 964 * 965 * If the return value is not NULL, the glocks on both the directory and the new 966 * file are held. A transaction has been started and an inplace reservation 967 * is held, as well. 968 * 969 * Returns: An inode 970 */ 971 972 struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, 973 unsigned int mode, dev_t dev) 974 { 975 struct inode *inode = NULL; 976 struct gfs2_inode *dip = ghs->gh_gl->gl_object; 977 struct inode *dir = &dip->i_inode; 978 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 979 struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 }; 980 int error; 981 u64 generation; 982 struct buffer_head *bh = NULL; 983 984 if (!name->len || name->len > GFS2_FNAMESIZE) 985 return ERR_PTR(-ENAMETOOLONG); 986 987 gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, ghs); 988 error = gfs2_glock_nq(ghs); 989 if (error) 990 goto fail; 991 992 error = create_ok(dip, name, mode); 993 if (error) 994 goto fail_gunlock; 995 996 error = pick_formal_ino(sdp, &inum.no_formal_ino); 997 if (error) 998 goto fail_gunlock; 999 1000 error = alloc_dinode(dip, &inum.no_addr, &generation); 1001 if (error) 1002 goto fail_gunlock; 1003 1004 error = gfs2_glock_nq_num(sdp, inum.no_addr, &gfs2_inode_glops, 1005 LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1); 1006 if (error) 1007 goto fail_gunlock; 1008 1009 error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev, &bh); 1010 if (error) 1011 goto fail_gunlock2; 1012 1013 inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode), 1014 inum.no_addr, 1015 inum.no_formal_ino, 0); 1016 if (IS_ERR(inode)) 1017 goto fail_gunlock2; 1018 1019 error = gfs2_inode_refresh(GFS2_I(inode)); 1020 if (error) 1021 goto fail_gunlock2; 1022 1023 error = gfs2_acl_create(dip, GFS2_I(inode)); 1024 if (error) 1025 goto fail_gunlock2; 1026 1027 error = gfs2_security_init(dip, GFS2_I(inode)); 1028 if (error) 1029 goto fail_gunlock2; 1030 1031 error = link_dinode(dip, name, GFS2_I(inode)); 1032 if (error) 1033 goto fail_gunlock2; 1034 1035 if (bh) 1036 brelse(bh); 1037 return inode; 1038 1039 fail_gunlock2: 1040 gfs2_glock_dq_uninit(ghs + 1); 1041 if (inode && !IS_ERR(inode)) 1042 iput(inode); 1043 fail_gunlock: 1044 gfs2_glock_dq(ghs); 1045 fail: 1046 if (bh) 1047 brelse(bh); 1048 return ERR_PTR(error); 1049 } 1050 1051 /** 1052 * gfs2_rmdiri - Remove a directory 1053 * @dip: The parent directory of the directory to be removed 1054 * @name: The name of the directory to be removed 1055 * @ip: The GFS2 inode of the directory to be removed 1056 * 1057 * Assumes Glocks on dip and ip are held 1058 * 1059 * Returns: errno 1060 */ 1061 1062 int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, 1063 struct gfs2_inode *ip) 1064 { 1065 struct qstr dotname; 1066 int error; 1067 1068 if (ip->i_entries != 2) { 1069 if (gfs2_consist_inode(ip)) 1070 gfs2_dinode_print(ip); 1071 return -EIO; 1072 } 1073 1074 error = gfs2_dir_del(dip, name); 1075 if (error) 1076 return error; 1077 1078 error = gfs2_change_nlink(dip, -1); 1079 if (error) 1080 return error; 1081 1082 gfs2_str2qstr(&dotname, "."); 1083 error = gfs2_dir_del(ip, &dotname); 1084 if (error) 1085 return error; 1086 1087 gfs2_str2qstr(&dotname, ".."); 1088 error = gfs2_dir_del(ip, &dotname); 1089 if (error) 1090 return error; 1091 1092 /* It looks odd, but it really should be done twice */ 1093 error = gfs2_change_nlink(ip, -1); 1094 if (error) 1095 return error; 1096 1097 error = gfs2_change_nlink(ip, -1); 1098 if (error) 1099 return error; 1100 1101 return error; 1102 } 1103 1104 /* 1105 * gfs2_unlink_ok - check to see that a inode is still in a directory 1106 * @dip: the directory 1107 * @name: the name of the file 1108 * @ip: the inode 1109 * 1110 * Assumes that the lock on (at least) @dip is held. 1111 * 1112 * Returns: 0 if the parent/child relationship is correct, errno if it isn't 1113 */ 1114 1115 int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, 1116 const struct gfs2_inode *ip) 1117 { 1118 int error; 1119 1120 if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode)) 1121 return -EPERM; 1122 1123 if ((dip->i_inode.i_mode & S_ISVTX) && 1124 dip->i_inode.i_uid != current_fsuid() && 1125 ip->i_inode.i_uid != current_fsuid() && !capable(CAP_FOWNER)) 1126 return -EPERM; 1127 1128 if (IS_APPEND(&dip->i_inode)) 1129 return -EPERM; 1130 1131 error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC); 1132 if (error) 1133 return error; 1134 1135 error = gfs2_dir_check(&dip->i_inode, name, ip); 1136 if (error) 1137 return error; 1138 1139 return 0; 1140 } 1141 1142 /** 1143 * gfs2_readlinki - return the contents of a symlink 1144 * @ip: the symlink's inode 1145 * @buf: a pointer to the buffer to be filled 1146 * @len: a pointer to the length of @buf 1147 * 1148 * If @buf is too small, a piece of memory is kmalloc()ed and needs 1149 * to be freed by the caller. 1150 * 1151 * Returns: errno 1152 */ 1153 1154 int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len) 1155 { 1156 struct gfs2_holder i_gh; 1157 struct buffer_head *dibh; 1158 unsigned int x; 1159 int error; 1160 1161 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh); 1162 error = gfs2_glock_nq(&i_gh); 1163 if (error) { 1164 gfs2_holder_uninit(&i_gh); 1165 return error; 1166 } 1167 1168 if (!ip->i_disksize) { 1169 gfs2_consist_inode(ip); 1170 error = -EIO; 1171 goto out; 1172 } 1173 1174 error = gfs2_meta_inode_buffer(ip, &dibh); 1175 if (error) 1176 goto out; 1177 1178 x = ip->i_disksize + 1; 1179 if (x > *len) { 1180 *buf = kmalloc(x, GFP_NOFS); 1181 if (!*buf) { 1182 error = -ENOMEM; 1183 goto out_brelse; 1184 } 1185 } 1186 1187 memcpy(*buf, dibh->b_data + sizeof(struct gfs2_dinode), x); 1188 *len = x; 1189 1190 out_brelse: 1191 brelse(dibh); 1192 out: 1193 gfs2_glock_dq_uninit(&i_gh); 1194 return error; 1195 } 1196 1197 static int 1198 __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) 1199 { 1200 struct buffer_head *dibh; 1201 int error; 1202 1203 error = gfs2_meta_inode_buffer(ip, &dibh); 1204 if (!error) { 1205 error = inode_setattr(&ip->i_inode, attr); 1206 gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error); 1207 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1208 gfs2_dinode_out(ip, dibh->b_data); 1209 brelse(dibh); 1210 } 1211 return error; 1212 } 1213 1214 /** 1215 * gfs2_setattr_simple - 1216 * @ip: 1217 * @attr: 1218 * 1219 * Called with a reference on the vnode. 1220 * 1221 * Returns: errno 1222 */ 1223 1224 int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) 1225 { 1226 int error; 1227 1228 if (current->journal_info) 1229 return __gfs2_setattr_simple(ip, attr); 1230 1231 error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), RES_DINODE, 0); 1232 if (error) 1233 return error; 1234 1235 error = __gfs2_setattr_simple(ip, attr); 1236 gfs2_trans_end(GFS2_SB(&ip->i_inode)); 1237 return error; 1238 } 1239 1240 void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) 1241 { 1242 struct gfs2_dinode *str = buf; 1243 1244 str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC); 1245 str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI); 1246 str->di_header.__pad0 = 0; 1247 str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI); 1248 str->di_header.__pad1 = 0; 1249 str->di_num.no_addr = cpu_to_be64(ip->i_no_addr); 1250 str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino); 1251 str->di_mode = cpu_to_be32(ip->i_inode.i_mode); 1252 str->di_uid = cpu_to_be32(ip->i_inode.i_uid); 1253 str->di_gid = cpu_to_be32(ip->i_inode.i_gid); 1254 str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink); 1255 str->di_size = cpu_to_be64(ip->i_disksize); 1256 str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); 1257 str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec); 1258 str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec); 1259 str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec); 1260 1261 str->di_goal_meta = cpu_to_be64(ip->i_goal); 1262 str->di_goal_data = cpu_to_be64(ip->i_goal); 1263 str->di_generation = cpu_to_be64(ip->i_generation); 1264 1265 str->di_flags = cpu_to_be32(ip->i_diskflags); 1266 str->di_height = cpu_to_be16(ip->i_height); 1267 str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) && 1268 !(ip->i_diskflags & GFS2_DIF_EXHASH) ? 1269 GFS2_FORMAT_DE : 0); 1270 str->di_depth = cpu_to_be16(ip->i_depth); 1271 str->di_entries = cpu_to_be32(ip->i_entries); 1272 1273 str->di_eattr = cpu_to_be64(ip->i_eattr); 1274 str->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec); 1275 str->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec); 1276 str->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec); 1277 } 1278 1279 void gfs2_dinode_print(const struct gfs2_inode *ip) 1280 { 1281 printk(KERN_INFO " no_formal_ino = %llu\n", 1282 (unsigned long long)ip->i_no_formal_ino); 1283 printk(KERN_INFO " no_addr = %llu\n", 1284 (unsigned long long)ip->i_no_addr); 1285 printk(KERN_INFO " i_disksize = %llu\n", 1286 (unsigned long long)ip->i_disksize); 1287 printk(KERN_INFO " blocks = %llu\n", 1288 (unsigned long long)gfs2_get_inode_blocks(&ip->i_inode)); 1289 printk(KERN_INFO " i_goal = %llu\n", 1290 (unsigned long long)ip->i_goal); 1291 printk(KERN_INFO " i_diskflags = 0x%.8X\n", ip->i_diskflags); 1292 printk(KERN_INFO " i_height = %u\n", ip->i_height); 1293 printk(KERN_INFO " i_depth = %u\n", ip->i_depth); 1294 printk(KERN_INFO " i_entries = %u\n", ip->i_entries); 1295 printk(KERN_INFO " i_eattr = %llu\n", 1296 (unsigned long long)ip->i_eattr); 1297 } 1298 1299