1 /* 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 3 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. 4 * 5 * This copyrighted material is made available to anyone wishing to use, 6 * modify, copy, or redistribute it subject to the terms and conditions 7 * of the GNU General Public License version 2. 8 */ 9 10 #include <linux/sched.h> 11 #include <linux/slab.h> 12 #include <linux/spinlock.h> 13 #include <linux/completion.h> 14 #include <linux/buffer_head.h> 15 #include <linux/posix_acl.h> 16 #include <linux/sort.h> 17 #include <linux/gfs2_ondisk.h> 18 #include <linux/crc32.h> 19 #include <linux/lm_interface.h> 20 #include <linux/security.h> 21 #include <linux/time.h> 22 23 #include "gfs2.h" 24 #include "incore.h" 25 #include "acl.h" 26 #include "bmap.h" 27 #include "dir.h" 28 #include "eattr.h" 29 #include "glock.h" 30 #include "glops.h" 31 #include "inode.h" 32 #include "log.h" 33 #include "meta_io.h" 34 #include "ops_address.h" 35 #include "quota.h" 36 #include "rgrp.h" 37 #include "trans.h" 38 #include "util.h" 39 40 struct gfs2_inum_range_host { 41 u64 ir_start; 42 u64 ir_length; 43 }; 44 45 static int iget_test(struct inode *inode, void *opaque) 46 { 47 struct gfs2_inode *ip = GFS2_I(inode); 48 u64 *no_addr = opaque; 49 50 if (ip->i_no_addr == *no_addr && test_bit(GIF_USER, &ip->i_flags)) 51 return 1; 52 53 return 0; 54 } 55 56 static int iget_set(struct inode *inode, void *opaque) 57 { 58 struct gfs2_inode *ip = GFS2_I(inode); 59 u64 *no_addr = opaque; 60 61 inode->i_ino = (unsigned long)*no_addr; 62 ip->i_no_addr = *no_addr; 63 set_bit(GIF_USER, &ip->i_flags); 64 return 0; 65 } 66 67 struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr) 68 { 69 unsigned long hash = (unsigned long)no_addr; 70 return ilookup5(sb, hash, iget_test, &no_addr); 71 } 72 73 static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr) 74 { 75 unsigned long hash = (unsigned long)no_addr; 76 return iget5_locked(sb, hash, iget_test, iget_set, &no_addr); 77 } 78 79 struct gfs2_skip_data { 80 u64 no_addr; 81 int skipped; 82 }; 83 84 static int iget_skip_test(struct inode *inode, void *opaque) 85 { 86 struct gfs2_inode *ip = GFS2_I(inode); 87 struct gfs2_skip_data *data = opaque; 88 89 if (ip->i_no_addr == data->no_addr && test_bit(GIF_USER, &ip->i_flags)){ 90 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)){ 91 data->skipped = 1; 92 return 0; 93 } 94 return 1; 95 } 96 return 0; 97 } 98 99 static int iget_skip_set(struct inode *inode, void *opaque) 100 { 101 struct gfs2_inode *ip = GFS2_I(inode); 102 struct gfs2_skip_data *data = opaque; 103 104 if (data->skipped) 105 return 1; 106 inode->i_ino = (unsigned long)(data->no_addr); 107 ip->i_no_addr = data->no_addr; 108 set_bit(GIF_USER, &ip->i_flags); 109 return 0; 110 } 111 112 static struct inode *gfs2_iget_skip(struct super_block *sb, 113 u64 no_addr) 114 { 115 struct gfs2_skip_data data; 116 unsigned long hash = (unsigned long)no_addr; 117 118 data.no_addr = no_addr; 119 data.skipped = 0; 120 return iget5_locked(sb, hash, iget_skip_test, iget_skip_set, &data); 121 } 122 123 /** 124 * GFS2 lookup code fills in vfs inode contents based on info obtained 125 * from directory entry inside gfs2_inode_lookup(). This has caused issues 126 * with NFS code path since its get_dentry routine doesn't have the relevant 127 * directory entry when gfs2_inode_lookup() is invoked. Part of the code 128 * segment inside gfs2_inode_lookup code needs to get moved around. 129 * 130 * Clean up I_LOCK and I_NEW as well. 131 **/ 132 133 void gfs2_set_iop(struct inode *inode) 134 { 135 struct gfs2_sbd *sdp = GFS2_SB(inode); 136 umode_t mode = inode->i_mode; 137 138 if (S_ISREG(mode)) { 139 inode->i_op = &gfs2_file_iops; 140 if (sdp->sd_args.ar_localflocks) 141 inode->i_fop = &gfs2_file_fops_nolock; 142 else 143 inode->i_fop = &gfs2_file_fops; 144 } else if (S_ISDIR(mode)) { 145 inode->i_op = &gfs2_dir_iops; 146 if (sdp->sd_args.ar_localflocks) 147 inode->i_fop = &gfs2_dir_fops_nolock; 148 else 149 inode->i_fop = &gfs2_dir_fops; 150 } else if (S_ISLNK(mode)) { 151 inode->i_op = &gfs2_symlink_iops; 152 } else { 153 inode->i_op = &gfs2_file_iops; 154 init_special_inode(inode, inode->i_mode, inode->i_rdev); 155 } 156 157 unlock_new_inode(inode); 158 } 159 160 /** 161 * gfs2_inode_lookup - Lookup an inode 162 * @sb: The super block 163 * @no_addr: The inode number 164 * @type: The type of the inode 165 * @skip_freeing: set this not return an inode if it is currently being freed. 166 * 167 * Returns: A VFS inode, or an error 168 */ 169 170 struct inode *gfs2_inode_lookup(struct super_block *sb, 171 unsigned int type, 172 u64 no_addr, 173 u64 no_formal_ino, int skip_freeing) 174 { 175 struct inode *inode; 176 struct gfs2_inode *ip; 177 struct gfs2_glock *io_gl; 178 int error; 179 180 if (skip_freeing) 181 inode = gfs2_iget_skip(sb, no_addr); 182 else 183 inode = gfs2_iget(sb, no_addr); 184 ip = GFS2_I(inode); 185 186 if (!inode) 187 return ERR_PTR(-ENOBUFS); 188 189 if (inode->i_state & I_NEW) { 190 struct gfs2_sbd *sdp = GFS2_SB(inode); 191 ip->i_no_formal_ino = no_formal_ino; 192 193 error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl); 194 if (unlikely(error)) 195 goto fail; 196 ip->i_gl->gl_object = ip; 197 198 error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl); 199 if (unlikely(error)) 200 goto fail_put; 201 202 set_bit(GIF_INVALID, &ip->i_flags); 203 error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); 204 if (unlikely(error)) 205 goto fail_iopen; 206 ip->i_iopen_gh.gh_gl->gl_object = ip; 207 208 gfs2_glock_put(io_gl); 209 210 if ((type == DT_UNKNOWN) && (no_formal_ino == 0)) 211 goto gfs2_nfsbypass; 212 213 inode->i_mode = DT2IF(type); 214 215 /* 216 * We must read the inode in order to work out its type in 217 * this case. Note that this doesn't happen often as we normally 218 * know the type beforehand. This code path only occurs during 219 * unlinked inode recovery (where it is safe to do this glock, 220 * which is not true in the general case). 221 */ 222 if (type == DT_UNKNOWN) { 223 struct gfs2_holder gh; 224 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); 225 if (unlikely(error)) 226 goto fail_glock; 227 /* Inode is now uptodate */ 228 gfs2_glock_dq_uninit(&gh); 229 } 230 231 gfs2_set_iop(inode); 232 } 233 234 gfs2_nfsbypass: 235 return inode; 236 fail_glock: 237 gfs2_glock_dq(&ip->i_iopen_gh); 238 fail_iopen: 239 gfs2_glock_put(io_gl); 240 fail_put: 241 ip->i_gl->gl_object = NULL; 242 gfs2_glock_put(ip->i_gl); 243 fail: 244 iget_failed(inode); 245 return ERR_PTR(error); 246 } 247 248 static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) 249 { 250 struct gfs2_dinode_host *di = &ip->i_di; 251 const struct gfs2_dinode *str = buf; 252 struct timespec atime; 253 u16 height, depth; 254 255 if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr))) 256 goto corrupt; 257 ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino); 258 ip->i_inode.i_mode = be32_to_cpu(str->di_mode); 259 ip->i_inode.i_rdev = 0; 260 switch (ip->i_inode.i_mode & S_IFMT) { 261 case S_IFBLK: 262 case S_IFCHR: 263 ip->i_inode.i_rdev = MKDEV(be32_to_cpu(str->di_major), 264 be32_to_cpu(str->di_minor)); 265 break; 266 }; 267 268 ip->i_inode.i_uid = be32_to_cpu(str->di_uid); 269 ip->i_inode.i_gid = be32_to_cpu(str->di_gid); 270 /* 271 * We will need to review setting the nlink count here in the 272 * light of the forthcoming ro bind mount work. This is a reminder 273 * to do that. 274 */ 275 ip->i_inode.i_nlink = be32_to_cpu(str->di_nlink); 276 di->di_size = be64_to_cpu(str->di_size); 277 i_size_write(&ip->i_inode, di->di_size); 278 gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks)); 279 atime.tv_sec = be64_to_cpu(str->di_atime); 280 atime.tv_nsec = be32_to_cpu(str->di_atime_nsec); 281 if (timespec_compare(&ip->i_inode.i_atime, &atime) < 0) 282 ip->i_inode.i_atime = atime; 283 ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime); 284 ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec); 285 ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime); 286 ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec); 287 288 ip->i_goal = be64_to_cpu(str->di_goal_meta); 289 di->di_generation = be64_to_cpu(str->di_generation); 290 291 di->di_flags = be32_to_cpu(str->di_flags); 292 gfs2_set_inode_flags(&ip->i_inode); 293 height = be16_to_cpu(str->di_height); 294 if (unlikely(height > GFS2_MAX_META_HEIGHT)) 295 goto corrupt; 296 ip->i_height = (u8)height; 297 298 depth = be16_to_cpu(str->di_depth); 299 if (unlikely(depth > GFS2_DIR_MAX_DEPTH)) 300 goto corrupt; 301 ip->i_depth = (u8)depth; 302 di->di_entries = be32_to_cpu(str->di_entries); 303 304 di->di_eattr = be64_to_cpu(str->di_eattr); 305 if (S_ISREG(ip->i_inode.i_mode)) 306 gfs2_set_aops(&ip->i_inode); 307 308 return 0; 309 corrupt: 310 if (gfs2_consist_inode(ip)) 311 gfs2_dinode_print(ip); 312 return -EIO; 313 } 314 315 /** 316 * gfs2_inode_refresh - Refresh the incore copy of the dinode 317 * @ip: The GFS2 inode 318 * 319 * Returns: errno 320 */ 321 322 int gfs2_inode_refresh(struct gfs2_inode *ip) 323 { 324 struct buffer_head *dibh; 325 int error; 326 327 error = gfs2_meta_inode_buffer(ip, &dibh); 328 if (error) 329 return error; 330 331 if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), dibh, GFS2_METATYPE_DI)) { 332 brelse(dibh); 333 return -EIO; 334 } 335 336 error = gfs2_dinode_in(ip, dibh->b_data); 337 brelse(dibh); 338 clear_bit(GIF_INVALID, &ip->i_flags); 339 340 return error; 341 } 342 343 int gfs2_dinode_dealloc(struct gfs2_inode *ip) 344 { 345 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 346 struct gfs2_alloc *al; 347 struct gfs2_rgrpd *rgd; 348 int error; 349 350 if (gfs2_get_inode_blocks(&ip->i_inode) != 1) { 351 if (gfs2_consist_inode(ip)) 352 gfs2_dinode_print(ip); 353 return -EIO; 354 } 355 356 al = gfs2_alloc_get(ip); 357 if (!al) 358 return -ENOMEM; 359 360 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 361 if (error) 362 goto out; 363 364 error = gfs2_rindex_hold(sdp, &al->al_ri_gh); 365 if (error) 366 goto out_qs; 367 368 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr); 369 if (!rgd) { 370 gfs2_consist_inode(ip); 371 error = -EIO; 372 goto out_rindex_relse; 373 } 374 375 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, 376 &al->al_rgd_gh); 377 if (error) 378 goto out_rindex_relse; 379 380 error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA, 1); 381 if (error) 382 goto out_rg_gunlock; 383 384 set_bit(GLF_DIRTY, &ip->i_gl->gl_flags); 385 set_bit(GLF_LFLUSH, &ip->i_gl->gl_flags); 386 387 gfs2_free_di(rgd, ip); 388 389 gfs2_trans_end(sdp); 390 clear_bit(GLF_STICKY, &ip->i_gl->gl_flags); 391 392 out_rg_gunlock: 393 gfs2_glock_dq_uninit(&al->al_rgd_gh); 394 out_rindex_relse: 395 gfs2_glock_dq_uninit(&al->al_ri_gh); 396 out_qs: 397 gfs2_quota_unhold(ip); 398 out: 399 gfs2_alloc_put(ip); 400 return error; 401 } 402 403 /** 404 * gfs2_change_nlink - Change nlink count on inode 405 * @ip: The GFS2 inode 406 * @diff: The change in the nlink count required 407 * 408 * Returns: errno 409 */ 410 int gfs2_change_nlink(struct gfs2_inode *ip, int diff) 411 { 412 struct buffer_head *dibh; 413 u32 nlink; 414 int error; 415 416 BUG_ON(diff != 1 && diff != -1); 417 nlink = ip->i_inode.i_nlink + diff; 418 419 /* If we are reducing the nlink count, but the new value ends up being 420 bigger than the old one, we must have underflowed. */ 421 if (diff < 0 && nlink > ip->i_inode.i_nlink) { 422 if (gfs2_consist_inode(ip)) 423 gfs2_dinode_print(ip); 424 return -EIO; 425 } 426 427 error = gfs2_meta_inode_buffer(ip, &dibh); 428 if (error) 429 return error; 430 431 if (diff > 0) 432 inc_nlink(&ip->i_inode); 433 else 434 drop_nlink(&ip->i_inode); 435 436 ip->i_inode.i_ctime = CURRENT_TIME; 437 438 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 439 gfs2_dinode_out(ip, dibh->b_data); 440 brelse(dibh); 441 mark_inode_dirty(&ip->i_inode); 442 443 if (ip->i_inode.i_nlink == 0) 444 gfs2_unlink_di(&ip->i_inode); /* mark inode unlinked */ 445 446 return error; 447 } 448 449 struct inode *gfs2_lookup_simple(struct inode *dip, const char *name) 450 { 451 struct qstr qstr; 452 struct inode *inode; 453 gfs2_str2qstr(&qstr, name); 454 inode = gfs2_lookupi(dip, &qstr, 1); 455 /* gfs2_lookupi has inconsistent callers: vfs 456 * related routines expect NULL for no entry found, 457 * gfs2_lookup_simple callers expect ENOENT 458 * and do not check for NULL. 459 */ 460 if (inode == NULL) 461 return ERR_PTR(-ENOENT); 462 else 463 return inode; 464 } 465 466 467 /** 468 * gfs2_lookupi - Look up a filename in a directory and return its inode 469 * @d_gh: An initialized holder for the directory glock 470 * @name: The name of the inode to look for 471 * @is_root: If 1, ignore the caller's permissions 472 * @i_gh: An uninitialized holder for the new inode glock 473 * 474 * This can be called via the VFS filldir function when NFS is doing 475 * a readdirplus and the inode which its intending to stat isn't 476 * already in cache. In this case we must not take the directory glock 477 * again, since the readdir call will have already taken that lock. 478 * 479 * Returns: errno 480 */ 481 482 struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, 483 int is_root) 484 { 485 struct super_block *sb = dir->i_sb; 486 struct gfs2_inode *dip = GFS2_I(dir); 487 struct gfs2_holder d_gh; 488 int error = 0; 489 struct inode *inode = NULL; 490 int unlock = 0; 491 492 if (!name->len || name->len > GFS2_FNAMESIZE) 493 return ERR_PTR(-ENAMETOOLONG); 494 495 if ((name->len == 1 && memcmp(name->name, ".", 1) == 0) || 496 (name->len == 2 && memcmp(name->name, "..", 2) == 0 && 497 dir == sb->s_root->d_inode)) { 498 igrab(dir); 499 return dir; 500 } 501 502 if (gfs2_glock_is_locked_by_me(dip->i_gl) == NULL) { 503 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); 504 if (error) 505 return ERR_PTR(error); 506 unlock = 1; 507 } 508 509 if (!is_root) { 510 error = gfs2_permission(dir, MAY_EXEC); 511 if (error) 512 goto out; 513 } 514 515 inode = gfs2_dir_search(dir, name); 516 if (IS_ERR(inode)) 517 error = PTR_ERR(inode); 518 out: 519 if (unlock) 520 gfs2_glock_dq_uninit(&d_gh); 521 if (error == -ENOENT) 522 return NULL; 523 return inode ? inode : ERR_PTR(error); 524 } 525 526 static void gfs2_inum_range_in(struct gfs2_inum_range_host *ir, const void *buf) 527 { 528 const struct gfs2_inum_range *str = buf; 529 530 ir->ir_start = be64_to_cpu(str->ir_start); 531 ir->ir_length = be64_to_cpu(str->ir_length); 532 } 533 534 static void gfs2_inum_range_out(const struct gfs2_inum_range_host *ir, void *buf) 535 { 536 struct gfs2_inum_range *str = buf; 537 538 str->ir_start = cpu_to_be64(ir->ir_start); 539 str->ir_length = cpu_to_be64(ir->ir_length); 540 } 541 542 static int pick_formal_ino_1(struct gfs2_sbd *sdp, u64 *formal_ino) 543 { 544 struct gfs2_inode *ip = GFS2_I(sdp->sd_ir_inode); 545 struct buffer_head *bh; 546 struct gfs2_inum_range_host ir; 547 int error; 548 549 error = gfs2_trans_begin(sdp, RES_DINODE, 0); 550 if (error) 551 return error; 552 mutex_lock(&sdp->sd_inum_mutex); 553 554 error = gfs2_meta_inode_buffer(ip, &bh); 555 if (error) { 556 mutex_unlock(&sdp->sd_inum_mutex); 557 gfs2_trans_end(sdp); 558 return error; 559 } 560 561 gfs2_inum_range_in(&ir, bh->b_data + sizeof(struct gfs2_dinode)); 562 563 if (ir.ir_length) { 564 *formal_ino = ir.ir_start++; 565 ir.ir_length--; 566 gfs2_trans_add_bh(ip->i_gl, bh, 1); 567 gfs2_inum_range_out(&ir, 568 bh->b_data + sizeof(struct gfs2_dinode)); 569 brelse(bh); 570 mutex_unlock(&sdp->sd_inum_mutex); 571 gfs2_trans_end(sdp); 572 return 0; 573 } 574 575 brelse(bh); 576 577 mutex_unlock(&sdp->sd_inum_mutex); 578 gfs2_trans_end(sdp); 579 580 return 1; 581 } 582 583 static int pick_formal_ino_2(struct gfs2_sbd *sdp, u64 *formal_ino) 584 { 585 struct gfs2_inode *ip = GFS2_I(sdp->sd_ir_inode); 586 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_inum_inode); 587 struct gfs2_holder gh; 588 struct buffer_head *bh; 589 struct gfs2_inum_range_host ir; 590 int error; 591 592 error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); 593 if (error) 594 return error; 595 596 error = gfs2_trans_begin(sdp, 2 * RES_DINODE, 0); 597 if (error) 598 goto out; 599 mutex_lock(&sdp->sd_inum_mutex); 600 601 error = gfs2_meta_inode_buffer(ip, &bh); 602 if (error) 603 goto out_end_trans; 604 605 gfs2_inum_range_in(&ir, bh->b_data + sizeof(struct gfs2_dinode)); 606 607 if (!ir.ir_length) { 608 struct buffer_head *m_bh; 609 u64 x, y; 610 __be64 z; 611 612 error = gfs2_meta_inode_buffer(m_ip, &m_bh); 613 if (error) 614 goto out_brelse; 615 616 z = *(__be64 *)(m_bh->b_data + sizeof(struct gfs2_dinode)); 617 x = y = be64_to_cpu(z); 618 ir.ir_start = x; 619 ir.ir_length = GFS2_INUM_QUANTUM; 620 x += GFS2_INUM_QUANTUM; 621 if (x < y) 622 gfs2_consist_inode(m_ip); 623 z = cpu_to_be64(x); 624 gfs2_trans_add_bh(m_ip->i_gl, m_bh, 1); 625 *(__be64 *)(m_bh->b_data + sizeof(struct gfs2_dinode)) = z; 626 627 brelse(m_bh); 628 } 629 630 *formal_ino = ir.ir_start++; 631 ir.ir_length--; 632 633 gfs2_trans_add_bh(ip->i_gl, bh, 1); 634 gfs2_inum_range_out(&ir, bh->b_data + sizeof(struct gfs2_dinode)); 635 636 out_brelse: 637 brelse(bh); 638 out_end_trans: 639 mutex_unlock(&sdp->sd_inum_mutex); 640 gfs2_trans_end(sdp); 641 out: 642 gfs2_glock_dq_uninit(&gh); 643 return error; 644 } 645 646 static int pick_formal_ino(struct gfs2_sbd *sdp, u64 *inum) 647 { 648 int error; 649 650 error = pick_formal_ino_1(sdp, inum); 651 if (error <= 0) 652 return error; 653 654 error = pick_formal_ino_2(sdp, inum); 655 656 return error; 657 } 658 659 /** 660 * create_ok - OK to create a new on-disk inode here? 661 * @dip: Directory in which dinode is to be created 662 * @name: Name of new dinode 663 * @mode: 664 * 665 * Returns: errno 666 */ 667 668 static int create_ok(struct gfs2_inode *dip, const struct qstr *name, 669 unsigned int mode) 670 { 671 int error; 672 673 error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC); 674 if (error) 675 return error; 676 677 /* Don't create entries in an unlinked directory */ 678 if (!dip->i_inode.i_nlink) 679 return -EPERM; 680 681 error = gfs2_dir_check(&dip->i_inode, name, NULL); 682 switch (error) { 683 case -ENOENT: 684 error = 0; 685 break; 686 case 0: 687 return -EEXIST; 688 default: 689 return error; 690 } 691 692 if (dip->i_di.di_entries == (u32)-1) 693 return -EFBIG; 694 if (S_ISDIR(mode) && dip->i_inode.i_nlink == (u32)-1) 695 return -EMLINK; 696 697 return 0; 698 } 699 700 static void munge_mode_uid_gid(struct gfs2_inode *dip, unsigned int *mode, 701 unsigned int *uid, unsigned int *gid) 702 { 703 if (GFS2_SB(&dip->i_inode)->sd_args.ar_suiddir && 704 (dip->i_inode.i_mode & S_ISUID) && dip->i_inode.i_uid) { 705 if (S_ISDIR(*mode)) 706 *mode |= S_ISUID; 707 else if (dip->i_inode.i_uid != current_fsuid()) 708 *mode &= ~07111; 709 *uid = dip->i_inode.i_uid; 710 } else 711 *uid = current_fsuid(); 712 713 if (dip->i_inode.i_mode & S_ISGID) { 714 if (S_ISDIR(*mode)) 715 *mode |= S_ISGID; 716 *gid = dip->i_inode.i_gid; 717 } else 718 *gid = current_fsgid(); 719 } 720 721 static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation) 722 { 723 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 724 int error; 725 726 if (gfs2_alloc_get(dip) == NULL) 727 return -ENOMEM; 728 729 dip->i_alloc->al_requested = RES_DINODE; 730 error = gfs2_inplace_reserve(dip); 731 if (error) 732 goto out; 733 734 error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS, 0); 735 if (error) 736 goto out_ipreserv; 737 738 *no_addr = gfs2_alloc_di(dip, generation); 739 740 gfs2_trans_end(sdp); 741 742 out_ipreserv: 743 gfs2_inplace_release(dip); 744 out: 745 gfs2_alloc_put(dip); 746 return error; 747 } 748 749 /** 750 * init_dinode - Fill in a new dinode structure 751 * @dip: the directory this inode is being created in 752 * @gl: The glock covering the new inode 753 * @inum: the inode number 754 * @mode: the file permissions 755 * @uid: 756 * @gid: 757 * 758 */ 759 760 static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, 761 const struct gfs2_inum_host *inum, unsigned int mode, 762 unsigned int uid, unsigned int gid, 763 const u64 *generation, dev_t dev, struct buffer_head **bhp) 764 { 765 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 766 struct gfs2_dinode *di; 767 struct buffer_head *dibh; 768 struct timespec tv = CURRENT_TIME; 769 770 dibh = gfs2_meta_new(gl, inum->no_addr); 771 gfs2_trans_add_bh(gl, dibh, 1); 772 gfs2_metatype_set(dibh, GFS2_METATYPE_DI, GFS2_FORMAT_DI); 773 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); 774 di = (struct gfs2_dinode *)dibh->b_data; 775 776 di->di_num.no_formal_ino = cpu_to_be64(inum->no_formal_ino); 777 di->di_num.no_addr = cpu_to_be64(inum->no_addr); 778 di->di_mode = cpu_to_be32(mode); 779 di->di_uid = cpu_to_be32(uid); 780 di->di_gid = cpu_to_be32(gid); 781 di->di_nlink = 0; 782 di->di_size = 0; 783 di->di_blocks = cpu_to_be64(1); 784 di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(tv.tv_sec); 785 di->di_major = cpu_to_be32(MAJOR(dev)); 786 di->di_minor = cpu_to_be32(MINOR(dev)); 787 di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr); 788 di->di_generation = cpu_to_be64(*generation); 789 di->di_flags = 0; 790 791 if (S_ISREG(mode)) { 792 if ((dip->i_di.di_flags & GFS2_DIF_INHERIT_JDATA) || 793 gfs2_tune_get(sdp, gt_new_files_jdata)) 794 di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA); 795 } else if (S_ISDIR(mode)) { 796 di->di_flags |= cpu_to_be32(dip->i_di.di_flags & 797 GFS2_DIF_INHERIT_JDATA); 798 } 799 800 di->__pad1 = 0; 801 di->di_payload_format = cpu_to_be32(S_ISDIR(mode) ? GFS2_FORMAT_DE : 0); 802 di->di_height = 0; 803 di->__pad2 = 0; 804 di->__pad3 = 0; 805 di->di_depth = 0; 806 di->di_entries = 0; 807 memset(&di->__pad4, 0, sizeof(di->__pad4)); 808 di->di_eattr = 0; 809 di->di_atime_nsec = cpu_to_be32(tv.tv_nsec); 810 di->di_mtime_nsec = cpu_to_be32(tv.tv_nsec); 811 di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec); 812 memset(&di->di_reserved, 0, sizeof(di->di_reserved)); 813 814 set_buffer_uptodate(dibh); 815 816 *bhp = dibh; 817 } 818 819 static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, 820 unsigned int mode, const struct gfs2_inum_host *inum, 821 const u64 *generation, dev_t dev, struct buffer_head **bhp) 822 { 823 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 824 unsigned int uid, gid; 825 int error; 826 827 munge_mode_uid_gid(dip, &mode, &uid, &gid); 828 if (!gfs2_alloc_get(dip)) 829 return -ENOMEM; 830 831 error = gfs2_quota_lock(dip, uid, gid); 832 if (error) 833 goto out; 834 835 error = gfs2_quota_check(dip, uid, gid); 836 if (error) 837 goto out_quota; 838 839 error = gfs2_trans_begin(sdp, RES_DINODE + RES_QUOTA, 0); 840 if (error) 841 goto out_quota; 842 843 init_dinode(dip, gl, inum, mode, uid, gid, generation, dev, bhp); 844 gfs2_quota_change(dip, +1, uid, gid); 845 gfs2_trans_end(sdp); 846 847 out_quota: 848 gfs2_quota_unlock(dip); 849 out: 850 gfs2_alloc_put(dip); 851 return error; 852 } 853 854 static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, 855 struct gfs2_inode *ip) 856 { 857 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 858 struct gfs2_alloc *al; 859 int alloc_required; 860 struct buffer_head *dibh; 861 int error; 862 863 al = gfs2_alloc_get(dip); 864 if (!al) 865 return -ENOMEM; 866 867 error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 868 if (error) 869 goto fail; 870 871 error = alloc_required = gfs2_diradd_alloc_required(&dip->i_inode, name); 872 if (alloc_required < 0) 873 goto fail_quota_locks; 874 if (alloc_required) { 875 error = gfs2_quota_check(dip, dip->i_inode.i_uid, dip->i_inode.i_gid); 876 if (error) 877 goto fail_quota_locks; 878 879 al->al_requested = sdp->sd_max_dirres; 880 881 error = gfs2_inplace_reserve(dip); 882 if (error) 883 goto fail_quota_locks; 884 885 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + 886 al->al_rgd->rd_length + 887 2 * RES_DINODE + 888 RES_STATFS + RES_QUOTA, 0); 889 if (error) 890 goto fail_ipreserv; 891 } else { 892 error = gfs2_trans_begin(sdp, RES_LEAF + 2 * RES_DINODE, 0); 893 if (error) 894 goto fail_quota_locks; 895 } 896 897 error = gfs2_dir_add(&dip->i_inode, name, ip, IF2DT(ip->i_inode.i_mode)); 898 if (error) 899 goto fail_end_trans; 900 901 error = gfs2_meta_inode_buffer(ip, &dibh); 902 if (error) 903 goto fail_end_trans; 904 ip->i_inode.i_nlink = 1; 905 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 906 gfs2_dinode_out(ip, dibh->b_data); 907 brelse(dibh); 908 return 0; 909 910 fail_end_trans: 911 gfs2_trans_end(sdp); 912 913 fail_ipreserv: 914 if (dip->i_alloc->al_rgd) 915 gfs2_inplace_release(dip); 916 917 fail_quota_locks: 918 gfs2_quota_unlock(dip); 919 920 fail: 921 gfs2_alloc_put(dip); 922 return error; 923 } 924 925 static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip) 926 { 927 int err; 928 size_t len; 929 void *value; 930 char *name; 931 struct gfs2_ea_request er; 932 933 err = security_inode_init_security(&ip->i_inode, &dip->i_inode, 934 &name, &value, &len); 935 936 if (err) { 937 if (err == -EOPNOTSUPP) 938 return 0; 939 return err; 940 } 941 942 memset(&er, 0, sizeof(struct gfs2_ea_request)); 943 944 er.er_type = GFS2_EATYPE_SECURITY; 945 er.er_name = name; 946 er.er_data = value; 947 er.er_name_len = strlen(name); 948 er.er_data_len = len; 949 950 err = gfs2_ea_set_i(ip, &er); 951 952 kfree(value); 953 kfree(name); 954 955 return err; 956 } 957 958 /** 959 * gfs2_createi - Create a new inode 960 * @ghs: An array of two holders 961 * @name: The name of the new file 962 * @mode: the permissions on the new inode 963 * 964 * @ghs[0] is an initialized holder for the directory 965 * @ghs[1] is the holder for the inode lock 966 * 967 * If the return value is not NULL, the glocks on both the directory and the new 968 * file are held. A transaction has been started and an inplace reservation 969 * is held, as well. 970 * 971 * Returns: An inode 972 */ 973 974 struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, 975 unsigned int mode, dev_t dev) 976 { 977 struct inode *inode = NULL; 978 struct gfs2_inode *dip = ghs->gh_gl->gl_object; 979 struct inode *dir = &dip->i_inode; 980 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 981 struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 }; 982 int error; 983 u64 generation; 984 struct buffer_head *bh = NULL; 985 986 if (!name->len || name->len > GFS2_FNAMESIZE) 987 return ERR_PTR(-ENAMETOOLONG); 988 989 gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, ghs); 990 error = gfs2_glock_nq(ghs); 991 if (error) 992 goto fail; 993 994 error = create_ok(dip, name, mode); 995 if (error) 996 goto fail_gunlock; 997 998 error = pick_formal_ino(sdp, &inum.no_formal_ino); 999 if (error) 1000 goto fail_gunlock; 1001 1002 error = alloc_dinode(dip, &inum.no_addr, &generation); 1003 if (error) 1004 goto fail_gunlock; 1005 1006 error = gfs2_glock_nq_num(sdp, inum.no_addr, &gfs2_inode_glops, 1007 LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1); 1008 if (error) 1009 goto fail_gunlock; 1010 1011 error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev, &bh); 1012 if (error) 1013 goto fail_gunlock2; 1014 1015 inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode), 1016 inum.no_addr, 1017 inum.no_formal_ino, 0); 1018 if (IS_ERR(inode)) 1019 goto fail_gunlock2; 1020 1021 error = gfs2_inode_refresh(GFS2_I(inode)); 1022 if (error) 1023 goto fail_gunlock2; 1024 1025 error = gfs2_acl_create(dip, GFS2_I(inode)); 1026 if (error) 1027 goto fail_gunlock2; 1028 1029 error = gfs2_security_init(dip, GFS2_I(inode)); 1030 if (error) 1031 goto fail_gunlock2; 1032 1033 error = link_dinode(dip, name, GFS2_I(inode)); 1034 if (error) 1035 goto fail_gunlock2; 1036 1037 if (bh) 1038 brelse(bh); 1039 return inode; 1040 1041 fail_gunlock2: 1042 gfs2_glock_dq_uninit(ghs + 1); 1043 if (inode && !IS_ERR(inode)) 1044 iput(inode); 1045 fail_gunlock: 1046 gfs2_glock_dq(ghs); 1047 fail: 1048 if (bh) 1049 brelse(bh); 1050 return ERR_PTR(error); 1051 } 1052 1053 /** 1054 * gfs2_rmdiri - Remove a directory 1055 * @dip: The parent directory of the directory to be removed 1056 * @name: The name of the directory to be removed 1057 * @ip: The GFS2 inode of the directory to be removed 1058 * 1059 * Assumes Glocks on dip and ip are held 1060 * 1061 * Returns: errno 1062 */ 1063 1064 int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, 1065 struct gfs2_inode *ip) 1066 { 1067 struct qstr dotname; 1068 int error; 1069 1070 if (ip->i_di.di_entries != 2) { 1071 if (gfs2_consist_inode(ip)) 1072 gfs2_dinode_print(ip); 1073 return -EIO; 1074 } 1075 1076 error = gfs2_dir_del(dip, name); 1077 if (error) 1078 return error; 1079 1080 error = gfs2_change_nlink(dip, -1); 1081 if (error) 1082 return error; 1083 1084 gfs2_str2qstr(&dotname, "."); 1085 error = gfs2_dir_del(ip, &dotname); 1086 if (error) 1087 return error; 1088 1089 gfs2_str2qstr(&dotname, ".."); 1090 error = gfs2_dir_del(ip, &dotname); 1091 if (error) 1092 return error; 1093 1094 /* It looks odd, but it really should be done twice */ 1095 error = gfs2_change_nlink(ip, -1); 1096 if (error) 1097 return error; 1098 1099 error = gfs2_change_nlink(ip, -1); 1100 if (error) 1101 return error; 1102 1103 return error; 1104 } 1105 1106 /* 1107 * gfs2_unlink_ok - check to see that a inode is still in a directory 1108 * @dip: the directory 1109 * @name: the name of the file 1110 * @ip: the inode 1111 * 1112 * Assumes that the lock on (at least) @dip is held. 1113 * 1114 * Returns: 0 if the parent/child relationship is correct, errno if it isn't 1115 */ 1116 1117 int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, 1118 const struct gfs2_inode *ip) 1119 { 1120 int error; 1121 1122 if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode)) 1123 return -EPERM; 1124 1125 if ((dip->i_inode.i_mode & S_ISVTX) && 1126 dip->i_inode.i_uid != current_fsuid() && 1127 ip->i_inode.i_uid != current_fsuid() && !capable(CAP_FOWNER)) 1128 return -EPERM; 1129 1130 if (IS_APPEND(&dip->i_inode)) 1131 return -EPERM; 1132 1133 error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC); 1134 if (error) 1135 return error; 1136 1137 error = gfs2_dir_check(&dip->i_inode, name, ip); 1138 if (error) 1139 return error; 1140 1141 return 0; 1142 } 1143 1144 /** 1145 * gfs2_readlinki - return the contents of a symlink 1146 * @ip: the symlink's inode 1147 * @buf: a pointer to the buffer to be filled 1148 * @len: a pointer to the length of @buf 1149 * 1150 * If @buf is too small, a piece of memory is kmalloc()ed and needs 1151 * to be freed by the caller. 1152 * 1153 * Returns: errno 1154 */ 1155 1156 int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len) 1157 { 1158 struct gfs2_holder i_gh; 1159 struct buffer_head *dibh; 1160 unsigned int x; 1161 int error; 1162 1163 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh); 1164 error = gfs2_glock_nq(&i_gh); 1165 if (error) { 1166 gfs2_holder_uninit(&i_gh); 1167 return error; 1168 } 1169 1170 if (!ip->i_di.di_size) { 1171 gfs2_consist_inode(ip); 1172 error = -EIO; 1173 goto out; 1174 } 1175 1176 error = gfs2_meta_inode_buffer(ip, &dibh); 1177 if (error) 1178 goto out; 1179 1180 x = ip->i_di.di_size + 1; 1181 if (x > *len) { 1182 *buf = kmalloc(x, GFP_NOFS); 1183 if (!*buf) { 1184 error = -ENOMEM; 1185 goto out_brelse; 1186 } 1187 } 1188 1189 memcpy(*buf, dibh->b_data + sizeof(struct gfs2_dinode), x); 1190 *len = x; 1191 1192 out_brelse: 1193 brelse(dibh); 1194 out: 1195 gfs2_glock_dq_uninit(&i_gh); 1196 return error; 1197 } 1198 1199 static int 1200 __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) 1201 { 1202 struct buffer_head *dibh; 1203 int error; 1204 1205 error = gfs2_meta_inode_buffer(ip, &dibh); 1206 if (!error) { 1207 error = inode_setattr(&ip->i_inode, attr); 1208 gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error); 1209 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1210 gfs2_dinode_out(ip, dibh->b_data); 1211 brelse(dibh); 1212 } 1213 return error; 1214 } 1215 1216 /** 1217 * gfs2_setattr_simple - 1218 * @ip: 1219 * @attr: 1220 * 1221 * Called with a reference on the vnode. 1222 * 1223 * Returns: errno 1224 */ 1225 1226 int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) 1227 { 1228 int error; 1229 1230 if (current->journal_info) 1231 return __gfs2_setattr_simple(ip, attr); 1232 1233 error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), RES_DINODE, 0); 1234 if (error) 1235 return error; 1236 1237 error = __gfs2_setattr_simple(ip, attr); 1238 gfs2_trans_end(GFS2_SB(&ip->i_inode)); 1239 return error; 1240 } 1241 1242 void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) 1243 { 1244 const struct gfs2_dinode_host *di = &ip->i_di; 1245 struct gfs2_dinode *str = buf; 1246 1247 str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC); 1248 str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI); 1249 str->di_header.__pad0 = 0; 1250 str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI); 1251 str->di_header.__pad1 = 0; 1252 str->di_num.no_addr = cpu_to_be64(ip->i_no_addr); 1253 str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino); 1254 str->di_mode = cpu_to_be32(ip->i_inode.i_mode); 1255 str->di_uid = cpu_to_be32(ip->i_inode.i_uid); 1256 str->di_gid = cpu_to_be32(ip->i_inode.i_gid); 1257 str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink); 1258 str->di_size = cpu_to_be64(di->di_size); 1259 str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); 1260 str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec); 1261 str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec); 1262 str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec); 1263 1264 str->di_goal_meta = cpu_to_be64(ip->i_goal); 1265 str->di_goal_data = cpu_to_be64(ip->i_goal); 1266 str->di_generation = cpu_to_be64(di->di_generation); 1267 1268 str->di_flags = cpu_to_be32(di->di_flags); 1269 str->di_height = cpu_to_be16(ip->i_height); 1270 str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) && 1271 !(ip->i_di.di_flags & GFS2_DIF_EXHASH) ? 1272 GFS2_FORMAT_DE : 0); 1273 str->di_depth = cpu_to_be16(ip->i_depth); 1274 str->di_entries = cpu_to_be32(di->di_entries); 1275 1276 str->di_eattr = cpu_to_be64(di->di_eattr); 1277 str->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec); 1278 str->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec); 1279 str->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec); 1280 } 1281 1282 void gfs2_dinode_print(const struct gfs2_inode *ip) 1283 { 1284 const struct gfs2_dinode_host *di = &ip->i_di; 1285 1286 printk(KERN_INFO " no_formal_ino = %llu\n", 1287 (unsigned long long)ip->i_no_formal_ino); 1288 printk(KERN_INFO " no_addr = %llu\n", 1289 (unsigned long long)ip->i_no_addr); 1290 printk(KERN_INFO " di_size = %llu\n", (unsigned long long)di->di_size); 1291 printk(KERN_INFO " blocks = %llu\n", 1292 (unsigned long long)gfs2_get_inode_blocks(&ip->i_inode)); 1293 printk(KERN_INFO " i_goal = %llu\n", 1294 (unsigned long long)ip->i_goal); 1295 printk(KERN_INFO " di_flags = 0x%.8X\n", di->di_flags); 1296 printk(KERN_INFO " i_height = %u\n", ip->i_height); 1297 printk(KERN_INFO " i_depth = %u\n", ip->i_depth); 1298 printk(KERN_INFO " di_entries = %u\n", di->di_entries); 1299 printk(KERN_INFO " di_eattr = %llu\n", 1300 (unsigned long long)di->di_eattr); 1301 } 1302 1303